From 2a0d0b9b77ae2298c3f50bc5fcaea2edfb752e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Mariaux?= <35563610+newbisebi@users.noreply.github.com> Date: Thu, 14 Mar 2019 20:57:22 +0100 Subject: [PATCH] Split sitemap (#619) Split sitemap when it is getting too big --- .github/ISSUE_TEMPLATE/documentation.md | 2 +- components/rebuild/src/lib.rs | 1 + components/site/src/lib.rs | 49 +++++++++++++++---- components/templates/src/builtins/sitemap.xml | 22 ++------- .../src/builtins/split_sitemap_index.xml | 7 +++ components/templates/src/lib.rs | 3 +- 6 files changed, 55 insertions(+), 29 deletions(-) create mode 100644 components/templates/src/builtins/split_sitemap_index.xml diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md index d046687..8e455bd 100644 --- a/.github/ISSUE_TEMPLATE/documentation.md +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -10,5 +10,5 @@ What is the issue? Is the documentation unclear? Is it missing information? ## Proposed solution A quick explanation of what you would like to see to solve the issue. -If you want to add content, please explain what you were looking fod and what was +If you want to add content, please explain what you were looking for and what was your process while looking at the current documentation. diff --git a/components/rebuild/src/lib.rs b/components/rebuild/src/lib.rs index 32f8e58..41b4fc4 100644 --- a/components/rebuild/src/lib.rs +++ b/components/rebuild/src/lib.rs @@ -369,6 +369,7 @@ pub fn after_template_change(site: &mut Site, path: &Path) -> Result<()> { match filename { "sitemap.xml" => site.render_sitemap(), "rss.xml" => site.render_rss_feed(site.library.read().unwrap().pages_values(), None), + "split_sitemap_index.xml" => site.render_sitemap(), "robots.txt" => site.render_robots(), "single.html" | "list.html" => site.render_taxonomies(), "page.html" => { diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index 1d3c27e..d8bada6 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -788,8 +788,6 @@ impl Site { pub fn render_sitemap(&self) -> Result<()> { ensure_directory_exists(&self.output_path)?; - let mut context = Context::new(); - let mut pages = self .library .read() @@ -806,7 +804,6 @@ impl Site { }) .collect::>(); pages.sort_by(|a, b| a.permalink.cmp(&b.permalink)); - context.insert("pages", &pages); let mut sections = self .library @@ -835,7 +832,6 @@ impl Site { } } sections.sort_by(|a, b| a.permalink.cmp(&b.permalink)); - context.insert("sections", §ions); let mut taxonomies = vec![]; for taxonomy in &self.taxonomies { @@ -869,13 +865,46 @@ impl Site { taxonomies.push(terms); } - context.insert("taxonomies", &taxonomies); - context.insert("config", &self.config); - - let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?; - - create_file(&self.output_path.join("sitemap.xml"), sitemap)?; + // Group all sitemap entries in one vector + let mut all_sitemap_entries = Vec::new(); + all_sitemap_entries.append(&mut pages); + all_sitemap_entries.append(&mut sections); + for terms in taxonomies { + let mut terms = terms; + all_sitemap_entries.append(&mut terms); + } + // Count total number of sitemap entries to include in sitemap + let total_number = all_sitemap_entries.len(); + let sitemap_limit = 30000; + + if total_number < sitemap_limit { + // Create single sitemap + let mut context = Context::new(); + context.insert("sitemap_entries", &all_sitemap_entries); + let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?; + create_file(&self.output_path.join("sitemap.xml"), sitemap)?; + return Ok(()) + } + + // Create multiple sitemaps (max 30000 urls each) + let mut sitemap_index = Vec::new(); + for (i, chunk) in all_sitemap_entries.chunks(sitemap_limit).enumerate() { + let mut context = Context::new(); + context.insert("sitemap_entries", &chunk); + let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?; + let file_name = format!("sitemap{}.xml", i+1); + create_file(&self.output_path.join(&file_name), sitemap)?; + let mut sitemap_url:String = self.config.make_permalink(&file_name); + sitemap_url.pop(); // Remove trailing slash + sitemap_index.push(sitemap_url); + } + // Create main sitemap that reference numbered sitemaps + let mut main_context = Context::new(); + main_context.insert("sitemaps", &sitemap_index); + let sitemap = &render_template("split_sitemap_index.xml", &self.tera, main_context, &self.config.theme)?; + create_file(&self.output_path.join("sitemap.xml"), sitemap)?; + Ok(()) } diff --git a/components/templates/src/builtins/sitemap.xml b/components/templates/src/builtins/sitemap.xml index 6eba3d7..3e92454 100644 --- a/components/templates/src/builtins/sitemap.xml +++ b/components/templates/src/builtins/sitemap.xml @@ -1,22 +1,10 @@ - {% for page in pages %} + {% for sitemap_entry in sitemap_entries %} - {{ page.permalink | safe }} - {% if page.date %} - {{ page.date }} + {{ sitemap_entry.permalink | safe }} + {% if sitemap_entry.date %} + {{ sitemap_entry.date }} {% endif %} {% endfor %} - {% for section in sections %} - - {{ section.permalink | safe }} - - {% endfor %} - {% for taxonomy in taxonomies %} - {% for entry in taxonomy %} - - {{ entry.permalink | safe }} - - {% endfor %} - {% endfor %} - + \ No newline at end of file diff --git a/components/templates/src/builtins/split_sitemap_index.xml b/components/templates/src/builtins/split_sitemap_index.xml new file mode 100644 index 0000000..1b883e4 --- /dev/null +++ b/components/templates/src/builtins/split_sitemap_index.xml @@ -0,0 +1,7 @@ + + {% for sitemap in sitemaps %} + + {{ sitemap }} + + {% endfor %} + \ No newline at end of file diff --git a/components/templates/src/lib.rs b/components/templates/src/lib.rs index 05f782b..eec3b1f 100644 --- a/components/templates/src/lib.rs +++ b/components/templates/src/lib.rs @@ -35,7 +35,8 @@ lazy_static! { ("__zola_builtins/rss.xml", include_str!("builtins/rss.xml")), ("__zola_builtins/sitemap.xml", include_str!("builtins/sitemap.xml")), ("__zola_builtins/robots.txt", include_str!("builtins/robots.txt")), - ("anchor-link.html", include_str!("builtins/anchor-link.html")), + ("__zola_builtins/split_sitemap_index.xml", include_str!("builtins/split_sitemap_index.xml")), + ("__zola_builtins/anchor-link.html", include_str!("builtins/anchor-link.html")), ( "__zola_builtins/shortcodes/youtube.html", include_str!("builtins/shortcodes/youtube.html"),