You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

539 lines
20KB

  1. use std::collections::{HashMap, HashSet};
  2. use std::path::{Path, PathBuf};
  3. use slotmap::{DefaultKey, DenseSlotMap};
  4. use front_matter::SortBy;
  5. use crate::content::{Page, Section};
  6. use crate::sorting::{find_siblings, sort_pages_by_date, sort_pages_by_weight};
  7. use config::Config;
  8. // Like vec! but for HashSet
  9. macro_rules! set {
  10. ( $( $x:expr ),* ) => {
  11. {
  12. let mut s = HashSet::new();
  13. $(
  14. s.insert($x);
  15. )*
  16. s
  17. }
  18. };
  19. }
  20. /// Houses everything about pages and sections
  21. /// Think of it as a database where each page and section has an id (Key here)
  22. /// that can be used to find the actual value
  23. /// Sections and pages can then refer to other elements by those keys, which are very cheap to
  24. /// copy.
  25. /// We can assume the keys are always existing as removing a page/section deletes all references
  26. /// to that key.
  27. #[derive(Debug)]
  28. pub struct Library {
  29. /// All the pages of the site
  30. pages: DenseSlotMap<DefaultKey, Page>,
  31. /// All the sections of the site
  32. sections: DenseSlotMap<DefaultKey, Section>,
  33. /// A mapping path -> key for pages so we can easily get their key
  34. pub paths_to_pages: HashMap<PathBuf, DefaultKey>,
  35. /// A mapping path -> key for sections so we can easily get their key
  36. pub paths_to_sections: HashMap<PathBuf, DefaultKey>,
  37. /// Whether we need to look for translations
  38. is_multilingual: bool,
  39. }
  40. impl Library {
  41. pub fn new(cap_pages: usize, cap_sections: usize, is_multilingual: bool) -> Self {
  42. Library {
  43. pages: DenseSlotMap::with_capacity(cap_pages),
  44. sections: DenseSlotMap::with_capacity(cap_sections),
  45. paths_to_pages: HashMap::with_capacity(cap_pages),
  46. paths_to_sections: HashMap::with_capacity(cap_sections),
  47. is_multilingual,
  48. }
  49. }
  50. /// Add a section and return its Key
  51. pub fn insert_section(&mut self, section: Section) -> DefaultKey {
  52. let path = section.file.path.clone();
  53. let key = self.sections.insert(section);
  54. self.paths_to_sections.insert(path, key);
  55. key
  56. }
  57. /// Add a page and return its Key
  58. pub fn insert_page(&mut self, page: Page) -> DefaultKey {
  59. let path = page.file.path.clone();
  60. let key = self.pages.insert(page);
  61. self.paths_to_pages.insert(path, key);
  62. key
  63. }
  64. pub fn pages(&self) -> &DenseSlotMap<DefaultKey, Page> {
  65. &self.pages
  66. }
  67. pub fn pages_mut(&mut self) -> &mut DenseSlotMap<DefaultKey, Page> {
  68. &mut self.pages
  69. }
  70. pub fn pages_values(&self) -> Vec<&Page> {
  71. self.pages.values().collect::<Vec<_>>()
  72. }
  73. pub fn sections(&self) -> &DenseSlotMap<DefaultKey, Section> {
  74. &self.sections
  75. }
  76. pub fn sections_mut(&mut self) -> &mut DenseSlotMap<DefaultKey, Section> {
  77. &mut self.sections
  78. }
  79. pub fn sections_values(&self) -> Vec<&Section> {
  80. self.sections.values().collect::<Vec<_>>()
  81. }
  82. /// Find out the direct subsections of each subsection if there are some
  83. /// as well as the pages for each section
  84. pub fn populate_sections(&mut self, config: &Config) {
  85. let root_path =
  86. self.sections.values().find(|s| s.is_index()).map(|s| s.file.parent.clone()).unwrap();
  87. // We are going to get both the ancestors and grandparents for each section in one go
  88. let mut ancestors: HashMap<PathBuf, Vec<_>> = HashMap::new();
  89. let mut subsections: HashMap<PathBuf, Vec<_>> = HashMap::new();
  90. for section in self.sections.values_mut() {
  91. // Make sure the pages of a section are empty since we can call that many times on `serve`
  92. section.pages = vec![];
  93. section.ignored_pages = vec![];
  94. if let Some(ref grand_parent) = section.file.grand_parent {
  95. subsections
  96. // Using the original filename to work for multi-lingual sections
  97. .entry(grand_parent.join(&section.file.filename))
  98. .or_insert_with(|| vec![])
  99. .push(section.file.path.clone());
  100. }
  101. // Index has no ancestors, no need to go through it
  102. if section.is_index() {
  103. ancestors.insert(section.file.path.clone(), vec![]);
  104. continue;
  105. }
  106. let mut path = root_path.clone();
  107. let root_key = self.paths_to_sections[&root_path.join(&section.file.filename)];
  108. // Index section is the first ancestor of every single section
  109. let mut parents = vec![root_key];
  110. for component in &section.file.components {
  111. path = path.join(component);
  112. // Skip itself
  113. if path == section.file.parent {
  114. continue;
  115. }
  116. if let Some(section_key) =
  117. self.paths_to_sections.get(&path.join(&section.file.filename))
  118. {
  119. parents.push(*section_key);
  120. }
  121. }
  122. ancestors.insert(section.file.path.clone(), parents);
  123. }
  124. for (key, page) in &mut self.pages {
  125. let parent_filename = if page.lang != config.default_language {
  126. format!("_index.{}.md", page.lang)
  127. } else {
  128. "_index.md".to_string()
  129. };
  130. let mut parent_section_path = page.file.parent.join(&parent_filename);
  131. while let Some(section_key) = self.paths_to_sections.get(&parent_section_path) {
  132. let parent_is_transparent;
  133. // We need to get a reference to a section later so keep the scope of borrowing small
  134. {
  135. let section = self.sections.get_mut(*section_key).unwrap();
  136. section.pages.push(key);
  137. parent_is_transparent = section.meta.transparent;
  138. }
  139. page.ancestors =
  140. ancestors.get(&parent_section_path).cloned().unwrap_or_else(|| vec![]);
  141. // Don't forget to push the actual parent
  142. page.ancestors.push(*section_key);
  143. // Find the page template if one of a parent has page_template set
  144. // Stops after the first one found, keep in mind page.ancestors
  145. // is [index, ..., parent] so we need to reverse it first
  146. if page.meta.template.is_none() {
  147. for ancestor in page.ancestors.iter().rev() {
  148. let s = self.sections.get(*ancestor).unwrap();
  149. if s.meta.page_template.is_some() {
  150. page.meta.template = s.meta.page_template.clone();
  151. break;
  152. }
  153. }
  154. }
  155. if !parent_is_transparent {
  156. break;
  157. }
  158. // We've added `_index(.{LANG})?.md` so if we are here so we need to go up twice
  159. match parent_section_path.clone().parent().unwrap().parent() {
  160. Some(parent) => parent_section_path = parent.join(&parent_filename),
  161. None => break,
  162. }
  163. }
  164. }
  165. self.populate_translations();
  166. self.sort_sections_pages();
  167. let sections = self.paths_to_sections.clone();
  168. let mut sections_weight = HashMap::new();
  169. for (key, section) in &self.sections {
  170. sections_weight.insert(key, section.meta.weight);
  171. }
  172. for section in self.sections.values_mut() {
  173. if let Some(ref children) = subsections.get(&section.file.path) {
  174. let mut children: Vec<_> = children.iter().map(|p| sections[p]).collect();
  175. children.sort_by(|a, b| sections_weight[a].cmp(&sections_weight[b]));
  176. section.subsections = children;
  177. }
  178. section.ancestors =
  179. ancestors.get(&section.file.path).cloned().unwrap_or_else(|| vec![]);
  180. }
  181. }
  182. /// Sort all sections pages according to sorting method given
  183. /// Pages that cannot be sorted are set to the section.ignored_pages instead
  184. pub fn sort_sections_pages(&mut self) {
  185. let mut updates = HashMap::new();
  186. for (key, section) in &self.sections {
  187. let (sorted_pages, cannot_be_sorted_pages) = match section.meta.sort_by {
  188. SortBy::None => continue,
  189. SortBy::Date => {
  190. let data = section
  191. .pages
  192. .iter()
  193. .map(|k| {
  194. if let Some(page) = self.pages.get(*k) {
  195. (k, page.meta.datetime, page.permalink.as_ref())
  196. } else {
  197. unreachable!("Sorting got an unknown page")
  198. }
  199. })
  200. .collect();
  201. sort_pages_by_date(data)
  202. }
  203. SortBy::Weight => {
  204. let data = section
  205. .pages
  206. .iter()
  207. .map(|k| {
  208. if let Some(page) = self.pages.get(*k) {
  209. (k, page.meta.weight, page.permalink.as_ref())
  210. } else {
  211. unreachable!("Sorting got an unknown page")
  212. }
  213. })
  214. .collect();
  215. sort_pages_by_weight(data)
  216. }
  217. };
  218. updates.insert(key, (sorted_pages, cannot_be_sorted_pages, section.meta.sort_by));
  219. }
  220. for (key, (sorted, cannot_be_sorted, sort_by)) in updates {
  221. // Find sibling between sorted pages first
  222. let with_siblings = find_siblings(&sorted);
  223. for (k2, val1, val2) in with_siblings {
  224. if let Some(page) = self.pages.get_mut(k2) {
  225. match sort_by {
  226. SortBy::Date => {
  227. page.earlier = val2;
  228. page.later = val1;
  229. }
  230. SortBy::Weight => {
  231. page.lighter = val1;
  232. page.heavier = val2;
  233. }
  234. SortBy::None => unreachable!("Impossible to find siblings in SortBy::None"),
  235. }
  236. } else {
  237. unreachable!("Sorting got an unknown page")
  238. }
  239. }
  240. if let Some(s) = self.sections.get_mut(key) {
  241. s.pages = sorted;
  242. s.ignored_pages = cannot_be_sorted;
  243. }
  244. }
  245. }
  246. /// Finds all the translations for each section/page and set the `translations`
  247. /// field of each as needed
  248. /// A no-op for sites without multiple languages
  249. fn populate_translations(&mut self) {
  250. if !self.is_multilingual {
  251. return;
  252. }
  253. // Sections first
  254. let mut sections_translations = HashMap::new();
  255. for (key, section) in &self.sections {
  256. sections_translations
  257. .entry(section.file.canonical.clone()) // TODO: avoid this clone
  258. .or_insert_with(Vec::new)
  259. .push(key);
  260. }
  261. for (key, section) in self.sections.iter_mut() {
  262. let translations = &sections_translations[&section.file.canonical];
  263. if translations.len() == 1 {
  264. section.translations = vec![];
  265. continue;
  266. }
  267. section.translations = translations.iter().filter(|k| **k != key).cloned().collect();
  268. }
  269. // Same thing for pages
  270. let mut pages_translations = HashMap::new();
  271. for (key, page) in &self.pages {
  272. pages_translations
  273. .entry(page.file.canonical.clone()) // TODO: avoid this clone
  274. .or_insert_with(Vec::new)
  275. .push(key);
  276. }
  277. for (key, page) in self.pages.iter_mut() {
  278. let translations = &pages_translations[&page.file.canonical];
  279. if translations.len() == 1 {
  280. page.translations = vec![];
  281. continue;
  282. }
  283. page.translations = translations.iter().filter(|k| **k != key).cloned().collect();
  284. }
  285. }
  286. /// Find all the orphan pages: pages that are in a folder without an `_index.md`
  287. pub fn get_all_orphan_pages(&self) -> Vec<&Page> {
  288. let pages_in_sections =
  289. self.sections.values().flat_map(|s| &s.pages).collect::<HashSet<_>>();
  290. self.pages
  291. .iter()
  292. .filter(|(key, _)| !pages_in_sections.contains(&key))
  293. .map(|(_, page)| page)
  294. .collect()
  295. }
  296. /// Find the parent section & all grandparents section that have transparent=true
  297. /// Only used in rebuild.
  298. pub fn find_parent_sections<P: AsRef<Path>>(&self, path: P) -> Vec<&Section> {
  299. let mut parents = vec![];
  300. let page = self.get_page(path.as_ref()).unwrap();
  301. for ancestor in page.ancestors.iter().rev() {
  302. let section = self.get_section_by_key(*ancestor);
  303. if parents.is_empty() || section.meta.transparent {
  304. parents.push(section);
  305. }
  306. }
  307. parents
  308. }
  309. /// Only used in tests
  310. pub fn get_section_key<P: AsRef<Path>>(&self, path: P) -> Option<&DefaultKey> {
  311. self.paths_to_sections.get(path.as_ref())
  312. }
  313. pub fn get_section<P: AsRef<Path>>(&self, path: P) -> Option<&Section> {
  314. self.sections.get(self.paths_to_sections.get(path.as_ref()).cloned().unwrap_or_default())
  315. }
  316. pub fn get_section_mut<P: AsRef<Path>>(&mut self, path: P) -> Option<&mut Section> {
  317. self.sections
  318. .get_mut(self.paths_to_sections.get(path.as_ref()).cloned().unwrap_or_default())
  319. }
  320. pub fn get_section_by_key(&self, key: DefaultKey) -> &Section {
  321. self.sections.get(key).unwrap()
  322. }
  323. pub fn get_section_mut_by_key(&mut self, key: DefaultKey) -> &mut Section {
  324. self.sections.get_mut(key).unwrap()
  325. }
  326. pub fn get_section_path_by_key(&self, key: DefaultKey) -> &str {
  327. &self.get_section_by_key(key).file.relative
  328. }
  329. pub fn get_page<P: AsRef<Path>>(&self, path: P) -> Option<&Page> {
  330. self.pages.get(self.paths_to_pages.get(path.as_ref()).cloned().unwrap_or_default())
  331. }
  332. pub fn get_page_by_key(&self, key: DefaultKey) -> &Page {
  333. self.pages.get(key).unwrap()
  334. }
  335. pub fn get_page_mut_by_key(&mut self, key: DefaultKey) -> &mut Page {
  336. self.pages.get_mut(key).unwrap()
  337. }
  338. pub fn remove_section<P: AsRef<Path>>(&mut self, path: P) -> Option<Section> {
  339. if let Some(k) = self.paths_to_sections.remove(path.as_ref()) {
  340. self.sections.remove(k)
  341. } else {
  342. None
  343. }
  344. }
  345. pub fn remove_page<P: AsRef<Path>>(&mut self, path: P) -> Option<Page> {
  346. if let Some(k) = self.paths_to_pages.remove(path.as_ref()) {
  347. self.pages.remove(k)
  348. } else {
  349. None
  350. }
  351. }
  352. /// Used in rebuild, to check if we know it already
  353. pub fn contains_section<P: AsRef<Path>>(&self, path: P) -> bool {
  354. self.paths_to_sections.contains_key(path.as_ref())
  355. }
  356. /// Used in rebuild, to check if we know it already
  357. pub fn contains_page<P: AsRef<Path>>(&self, path: P) -> bool {
  358. self.paths_to_pages.contains_key(path.as_ref())
  359. }
  360. /// This will check every section/page paths + the aliases and ensure none of them
  361. /// are colliding.
  362. /// Returns (path colliding, [list of files causing that collision])
  363. pub fn check_for_path_collisions(&self) -> Vec<(&str, Vec<String>)> {
  364. let mut paths: HashMap<&str, HashSet<DefaultKey>> = HashMap::new();
  365. for (key, page) in &self.pages {
  366. paths
  367. .entry(&page.path)
  368. .and_modify(|s| {
  369. s.insert(key);
  370. })
  371. .or_insert_with(|| set!(key));
  372. for alias in &page.meta.aliases {
  373. paths
  374. .entry(&alias)
  375. .and_modify(|s| {
  376. s.insert(key);
  377. })
  378. .or_insert_with(|| set!(key));
  379. }
  380. }
  381. for (key, section) in &self.sections {
  382. if !section.meta.render {
  383. continue;
  384. }
  385. paths
  386. .entry(&section.path)
  387. .and_modify(|s| {
  388. s.insert(key);
  389. })
  390. .or_insert_with(|| set!(key));
  391. }
  392. let mut collisions = vec![];
  393. for (p, keys) in paths {
  394. if keys.len() > 1 {
  395. let file_paths: Vec<String> = keys
  396. .iter()
  397. .map(|k| {
  398. self.pages.get(*k).map(|p| p.file.relative.clone()).unwrap_or_else(|| {
  399. self.sections.get(*k).map(|s| s.file.relative.clone()).unwrap()
  400. })
  401. })
  402. .collect();
  403. collisions.push((p, file_paths));
  404. }
  405. }
  406. collisions
  407. }
  408. }
  409. #[cfg(test)]
  410. mod tests {
  411. use super::*;
  412. #[test]
  413. fn can_find_no_collisions() {
  414. let mut library = Library::new(10, 10, false);
  415. let mut page = Page::default();
  416. page.path = "hello".to_string();
  417. let mut page2 = Page::default();
  418. page2.path = "hello-world".to_string();
  419. let mut section = Section::default();
  420. section.path = "blog".to_string();
  421. library.insert_page(page);
  422. library.insert_page(page2);
  423. library.insert_section(section);
  424. let collisions = library.check_for_path_collisions();
  425. assert_eq!(collisions.len(), 0);
  426. }
  427. #[test]
  428. fn can_find_collisions_between_pages() {
  429. let mut library = Library::new(10, 10, false);
  430. let mut page = Page::default();
  431. page.path = "hello".to_string();
  432. page.file.relative = "hello".to_string();
  433. let mut page2 = Page::default();
  434. page2.path = "hello".to_string();
  435. page2.file.relative = "hello-world".to_string();
  436. let mut section = Section::default();
  437. section.path = "blog".to_string();
  438. section.file.relative = "hello-world".to_string();
  439. library.insert_page(page.clone());
  440. library.insert_page(page2.clone());
  441. library.insert_section(section);
  442. let collisions = library.check_for_path_collisions();
  443. assert_eq!(collisions.len(), 1);
  444. assert_eq!(collisions[0].0, page.path);
  445. assert!(collisions[0].1.contains(&page.file.relative));
  446. assert!(collisions[0].1.contains(&page2.file.relative));
  447. }
  448. #[test]
  449. fn can_find_collisions_with_an_alias() {
  450. let mut library = Library::new(10, 10, false);
  451. let mut page = Page::default();
  452. page.path = "hello".to_string();
  453. page.file.relative = "hello".to_string();
  454. let mut page2 = Page::default();
  455. page2.path = "hello-world".to_string();
  456. page2.file.relative = "hello-world".to_string();
  457. page2.meta.aliases = vec!["hello".to_string()];
  458. let mut section = Section::default();
  459. section.path = "blog".to_string();
  460. section.file.relative = "hello-world".to_string();
  461. library.insert_page(page.clone());
  462. library.insert_page(page2.clone());
  463. library.insert_section(section);
  464. let collisions = library.check_for_path_collisions();
  465. assert_eq!(collisions.len(), 1);
  466. assert_eq!(collisions[0].0, page.path);
  467. assert!(collisions[0].1.contains(&page.file.relative));
  468. assert!(collisions[0].1.contains(&page2.file.relative));
  469. }
  470. }