Browse Source

Split sitemap (#619)

Split sitemap when it is getting too big
index-subcmd
Sébastien Mariaux Vincent Prouillet 5 years ago
parent
commit
2a0d0b9b77
6 changed files with 55 additions and 29 deletions
  1. +1
    -1
      .github/ISSUE_TEMPLATE/documentation.md
  2. +1
    -0
      components/rebuild/src/lib.rs
  3. +39
    -10
      components/site/src/lib.rs
  4. +5
    -17
      components/templates/src/builtins/sitemap.xml
  5. +7
    -0
      components/templates/src/builtins/split_sitemap_index.xml
  6. +2
    -1
      components/templates/src/lib.rs

+ 1
- 1
.github/ISSUE_TEMPLATE/documentation.md View File

@@ -10,5 +10,5 @@ What is the issue? Is the documentation unclear? Is it missing information?

## Proposed solution
A quick explanation of what you would like to see to solve the issue.
If you want to add content, please explain what you were looking fod and what was
If you want to add content, please explain what you were looking for and what was
your process while looking at the current documentation.

+ 1
- 0
components/rebuild/src/lib.rs View File

@@ -369,6 +369,7 @@ pub fn after_template_change(site: &mut Site, path: &Path) -> Result<()> {
match filename {
"sitemap.xml" => site.render_sitemap(),
"rss.xml" => site.render_rss_feed(site.library.read().unwrap().pages_values(), None),
"split_sitemap_index.xml" => site.render_sitemap(),
"robots.txt" => site.render_robots(),
"single.html" | "list.html" => site.render_taxonomies(),
"page.html" => {


+ 39
- 10
components/site/src/lib.rs View File

@@ -788,8 +788,6 @@ impl Site {
pub fn render_sitemap(&self) -> Result<()> {
ensure_directory_exists(&self.output_path)?;

let mut context = Context::new();

let mut pages = self
.library
.read()
@@ -806,7 +804,6 @@ impl Site {
})
.collect::<Vec<_>>();
pages.sort_by(|a, b| a.permalink.cmp(&b.permalink));
context.insert("pages", &pages);

let mut sections = self
.library
@@ -835,7 +832,6 @@ impl Site {
}
}
sections.sort_by(|a, b| a.permalink.cmp(&b.permalink));
context.insert("sections", &sections);

let mut taxonomies = vec![];
for taxonomy in &self.taxonomies {
@@ -869,13 +865,46 @@ impl Site {
taxonomies.push(terms);
}

context.insert("taxonomies", &taxonomies);
context.insert("config", &self.config);

let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?;

create_file(&self.output_path.join("sitemap.xml"), sitemap)?;
// Group all sitemap entries in one vector
let mut all_sitemap_entries = Vec::new();
all_sitemap_entries.append(&mut pages);
all_sitemap_entries.append(&mut sections);
for terms in taxonomies {
let mut terms = terms;
all_sitemap_entries.append(&mut terms);
}

// Count total number of sitemap entries to include in sitemap
let total_number = all_sitemap_entries.len();
let sitemap_limit = 30000;

if total_number < sitemap_limit {
// Create single sitemap
let mut context = Context::new();
context.insert("sitemap_entries", &all_sitemap_entries);
let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?;
create_file(&self.output_path.join("sitemap.xml"), sitemap)?;
return Ok(())
}
// Create multiple sitemaps (max 30000 urls each)
let mut sitemap_index = Vec::new();
for (i, chunk) in all_sitemap_entries.chunks(sitemap_limit).enumerate() {
let mut context = Context::new();
context.insert("sitemap_entries", &chunk);
let sitemap = &render_template("sitemap.xml", &self.tera, context, &self.config.theme)?;
let file_name = format!("sitemap{}.xml", i+1);
create_file(&self.output_path.join(&file_name), sitemap)?;
let mut sitemap_url:String = self.config.make_permalink(&file_name);
sitemap_url.pop(); // Remove trailing slash
sitemap_index.push(sitemap_url);
}
// Create main sitemap that reference numbered sitemaps
let mut main_context = Context::new();
main_context.insert("sitemaps", &sitemap_index);
let sitemap = &render_template("split_sitemap_index.xml", &self.tera, main_context, &self.config.theme)?;
create_file(&self.output_path.join("sitemap.xml"), sitemap)?;
Ok(())
}



+ 5
- 17
components/templates/src/builtins/sitemap.xml View File

@@ -1,22 +1,10 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for page in pages %}
{% for sitemap_entry in sitemap_entries %}
<url>
<loc>{{ page.permalink | safe }}</loc>
{% if page.date %}
<lastmod>{{ page.date }}</lastmod>
<loc>{{ sitemap_entry.permalink | safe }}</loc>
{% if sitemap_entry.date %}
<lastmod>{{ sitemap_entry.date }}</lastmod>
{% endif %}
</url>
{% endfor %}
{% for section in sections %}
<url>
<loc>{{ section.permalink | safe }}</loc>
</url>
{% endfor %}
{% for taxonomy in taxonomies %}
{% for entry in taxonomy %}
<url>
<loc>{{ entry.permalink | safe }}</loc>
</url>
{% endfor %}
{% endfor %}
</urlset>
</urlset>

+ 7
- 0
components/templates/src/builtins/split_sitemap_index.xml View File

@@ -0,0 +1,7 @@
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for sitemap in sitemaps %}
<sitemap>
<loc>{{ sitemap }}</loc>
</sitemap>
{% endfor %}
</sitemapindex>

+ 2
- 1
components/templates/src/lib.rs View File

@@ -35,7 +35,8 @@ lazy_static! {
("__zola_builtins/rss.xml", include_str!("builtins/rss.xml")),
("__zola_builtins/sitemap.xml", include_str!("builtins/sitemap.xml")),
("__zola_builtins/robots.txt", include_str!("builtins/robots.txt")),
("anchor-link.html", include_str!("builtins/anchor-link.html")),
("__zola_builtins/split_sitemap_index.xml", include_str!("builtins/split_sitemap_index.xml")),
("__zola_builtins/anchor-link.html", include_str!("builtins/anchor-link.html")),
(
"__zola_builtins/shortcodes/youtube.html",
include_str!("builtins/shortcodes/youtube.html"),


Loading…
Cancel
Save