diff --git a/CHANGELOG.md b/CHANGELOG.md index f95ed95..4dfaace 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,11 +18,12 @@ rendering it and not anymore on the `page`/`section` variable - Add Dracula syntax highlighting theme - Fix using inline styles in headers - Fix sections with render=false being shown in sitemap -- Sitemap is now split when there are more than 30 000 links in it +- Sitemap is now split when there are more than 30 000 links in it - Add link to sitemap in robots.txt - Markdown rendering is now fully CommonMark compliant - `load_data` now defaults to loading file as plain text, unless `format` is passed or the extension matches csv/toml/json +- Sitemap entries get an additional `extra` field for pages only ## 0.5.1 (2018-12-14) diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index bd41dae..ea2467a 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -19,7 +19,10 @@ extern crate utils; #[cfg(test)] extern crate tempfile; -use std::collections::{HashMap, HashSet}; + +mod sitemap; + +use std::collections::{HashMap}; use std::fs::{copy, create_dir_all, remove_dir_all}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex, RwLock}; @@ -40,20 +43,6 @@ use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_ use utils::net::get_available_port; use utils::templates::{render_template, rewrite_theme_paths}; -/// The sitemap only needs links and potentially date so we trim down -/// all pages to only that -#[derive(Debug, Serialize, Eq, PartialEq, Hash)] -struct SitemapEntry { - permalink: String, - date: Option, -} - -impl SitemapEntry { - pub fn new(permalink: String, date: Option) -> SitemapEntry { - SitemapEntry { permalink, date } - } -} - #[derive(Debug)] pub struct Site { /// The base path of the zola site @@ -787,98 +776,15 @@ impl Site { pub fn render_sitemap(&self) -> Result<()> { ensure_directory_exists(&self.output_path)?; - let pages = self - .library - .read() - .unwrap() - .pages_values() - .iter() - .filter(|p| !p.is_draft()) - .map(|p| { - let date = match p.meta.date { - Some(ref d) => Some(d.to_string()), - None => None, - }; - SitemapEntry::new(p.permalink.clone(), date) - }) - .collect::>(); - - let mut sections = self - .library - .read() - .unwrap() - .sections_values() - .iter() - .filter(|s| s.meta.render) - .map(|s| SitemapEntry::new(s.permalink.clone(), None)) - .collect::>(); - for section in self - .library - .read() - .unwrap() - .sections_values() - .iter() - .filter(|s| s.meta.paginate_by.is_some()) - { - let number_pagers = (section.pages.len() as f64 - / section.meta.paginate_by.unwrap() as f64) - .ceil() as isize; - for i in 1..=number_pagers { - let permalink = - format!("{}{}/{}/", section.permalink, section.meta.paginate_path, i); - sections.push(SitemapEntry::new(permalink, None)) - } - } - - let mut taxonomies = vec![]; - for taxonomy in &self.taxonomies { - let name = &taxonomy.kind.name; - let mut terms = vec![]; - terms.push(SitemapEntry::new(self.config.make_permalink(name), None)); - for item in &taxonomy.items { - terms.push(SitemapEntry::new( - self.config.make_permalink(&format!("{}/{}", &name, item.slug)), - None, - )); - - if taxonomy.kind.is_paginated() { - let number_pagers = (item.pages.len() as f64 - / taxonomy.kind.paginate_by.unwrap() as f64) - .ceil() as isize; - for i in 1..=number_pagers { - let permalink = self.config.make_permalink(&format!( - "{}/{}/{}/{}", - name, - item.slug, - taxonomy.kind.paginate_path(), - i - )); - terms.push(SitemapEntry::new(permalink, None)) - } - } - } - - taxonomies.push(terms); - } - - let mut all_sitemap_entries = HashSet::new(); - for p in pages { - all_sitemap_entries.insert(p); - } - for s in sections { - all_sitemap_entries.insert(s); - } - for terms in taxonomies { - for term in terms { - all_sitemap_entries.insert(term); - } - } - - // Count total number of sitemap entries to include in sitemap - let total_number = all_sitemap_entries.len(); + let library = self.library.read().unwrap(); + let all_sitemap_entries = sitemap::find_entries( + &library, + &self.taxonomies[..], + &self.config, + ); let sitemap_limit = 30000; - if total_number < sitemap_limit { + if all_sitemap_entries.len() < sitemap_limit { // Create single sitemap let mut context = Context::new(); context.insert("entries", &all_sitemap_entries); diff --git a/components/site/src/sitemap.rs b/components/site/src/sitemap.rs new file mode 100644 index 0000000..c38ed96 --- /dev/null +++ b/components/site/src/sitemap.rs @@ -0,0 +1,127 @@ +use std::borrow::Cow; +use std::hash::{Hash, Hasher}; +use std::collections::{HashSet}; + +use tera::{Map, Value}; +use config::{Config}; +use library::{Library, Taxonomy}; + +/// The sitemap only needs links, potentially date and extra for pages in case of updates +/// for examples so we trim down all entries to only that +#[derive(Debug, Serialize)] +pub struct SitemapEntry<'a> { + permalink: Cow<'a, str>, + date: Option, + extra: Option<&'a Map>, +} + +// Hash/Eq is not implemented for tera::Map but in our case we only care about the permalink +// when comparing/hashing so we implement it manually +impl<'a> Hash for SitemapEntry<'a> { + fn hash(&self, state: &mut H) { + self.permalink.hash(state); + } +} +impl<'a> PartialEq for SitemapEntry<'a> { + fn eq(&self, other: &SitemapEntry) -> bool { + self.permalink == other.permalink + } +} +impl<'a> Eq for SitemapEntry<'a> {} + +impl<'a> SitemapEntry<'a> { + pub fn new(permalink: Cow<'a, str>, date: Option) -> Self { + SitemapEntry { permalink, date, extra: None } + } + + pub fn add_extra(&mut self, extra: &'a Map) { + self.extra = Some(extra); + } +} + +/// Finds out all the links to put in a sitemap from the pages/sections/taxonomies +/// There are no duplicate permalinks in the output vec +pub fn find_entries<'a>(library: &'a Library, taxonomies: &'a [Taxonomy], config: &'a Config) -> Vec> { + let pages = library + .pages_values() + .iter() + .filter(|p| !p.is_draft()) + .map(|p| { + let date = match p.meta.date { + Some(ref d) => Some(d.to_string()), + None => None, + }; + let mut entry = SitemapEntry::new(Cow::Borrowed(&p.permalink), date); + entry.add_extra(&p.meta.extra); + entry + }) + .collect::>(); + + let mut sections = library + .sections_values() + .iter() + .filter(|s| s.meta.render) + .map(|s| SitemapEntry::new(Cow::Borrowed(&s.permalink), None)) + .collect::>(); + + for section in library + .sections_values() + .iter() + .filter(|s| s.meta.paginate_by.is_some()) + { + let number_pagers = (section.pages.len() as f64 + / section.meta.paginate_by.unwrap() as f64) + .ceil() as isize; + for i in 1..=number_pagers { + let permalink = + format!("{}{}/{}/", section.permalink, section.meta.paginate_path, i); + sections.push(SitemapEntry::new(Cow::Owned(permalink), None)) + } + } + + let mut taxonomies_entries = vec![]; + for taxonomy in taxonomies { + let name = &taxonomy.kind.name; + let mut terms = vec![]; + terms.push(SitemapEntry::new(Cow::Owned(config.make_permalink(name)), None)); + for item in &taxonomy.items { + terms.push(SitemapEntry::new( + Cow::Owned(config.make_permalink(&format!("{}/{}", name, item.slug))), + None, + )); + + if taxonomy.kind.is_paginated() { + let number_pagers = (item.pages.len() as f64 + / taxonomy.kind.paginate_by.unwrap() as f64) + .ceil() as isize; + for i in 1..=number_pagers { + let permalink = config.make_permalink(&format!( + "{}/{}/{}/{}", + name, + item.slug, + taxonomy.kind.paginate_path(), + i + )); + terms.push(SitemapEntry::new(Cow::Owned(permalink), None)) + } + } + } + + taxonomies_entries.push(terms); + } + + let mut all_sitemap_entries = HashSet::new(); + for p in pages { + all_sitemap_entries.insert(p); + } + for s in sections { + all_sitemap_entries.insert(s); + } + for terms in taxonomies_entries { + for term in terms { + all_sitemap_entries.insert(term); + } + } + + all_sitemap_entries.into_iter().collect::>() +} diff --git a/docs/content/documentation/templates/sitemap.md b/docs/content/documentation/templates/sitemap.md index 13119c8..74f0f5a 100644 --- a/docs/content/documentation/templates/sitemap.md +++ b/docs/content/documentation/templates/sitemap.md @@ -25,6 +25,7 @@ A `SitemapEntry` has the following fields: ```ts permalink: String; date: String?; +extra: Hashmap?; ``` The `split_sitemap_index.xml` also gets a single variable: