You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

663 lines
24KB

  1. use std::collections::HashMap;
  2. use std::fs::{remove_dir_all, copy, create_dir_all};
  3. use std::path::{Path, PathBuf};
  4. use glob::glob;
  5. use tera::{Tera, Context};
  6. use walkdir::WalkDir;
  7. use errors::{Result, ResultExt};
  8. use config::{Config, get_config};
  9. use fs::{create_file, create_directory, ensure_directory_exists};
  10. use content::{Page, Section, Paginator, SortBy, Taxonomy, populate_previous_and_next_pages, sort_pages};
  11. use templates::{GUTENBERG_TERA, global_fns, render_redirect_template};
  12. use front_matter::InsertAnchor;
  13. use rayon::prelude::*;
  14. #[derive(Debug)]
  15. pub struct Site {
  16. /// The base path of the gutenberg site
  17. pub base_path: PathBuf,
  18. /// The parsed config for the site
  19. pub config: Config,
  20. pub pages: HashMap<PathBuf, Page>,
  21. pub sections: HashMap<PathBuf, Section>,
  22. pub tera: Tera,
  23. live_reload: bool,
  24. output_path: PathBuf,
  25. static_path: PathBuf,
  26. pub tags: Option<Taxonomy>,
  27. pub categories: Option<Taxonomy>,
  28. /// A map of all .md files (section and pages) and their permalink
  29. /// We need that if there are relative links in the content that need to be resolved
  30. pub permalinks: HashMap<String, String>,
  31. }
  32. impl Site {
  33. /// Parse a site at the given path. Defaults to the current dir
  34. /// Passing in a path is only used in tests
  35. pub fn new<P: AsRef<Path>>(path: P, config_file: &str) -> Result<Site> {
  36. let path = path.as_ref();
  37. let tpl_glob = format!("{}/{}", path.to_string_lossy().replace("\\", "/"), "templates/**/*.*ml");
  38. let mut tera = Tera::new(&tpl_glob).chain_err(|| "Error parsing templates")?;
  39. tera.extend(&GUTENBERG_TERA)?;
  40. let site = Site {
  41. base_path: path.to_path_buf(),
  42. config: get_config(path, config_file),
  43. pages: HashMap::new(),
  44. sections: HashMap::new(),
  45. tera: tera,
  46. live_reload: false,
  47. output_path: path.join("public"),
  48. static_path: path.join("static"),
  49. tags: None,
  50. categories: None,
  51. permalinks: HashMap::new(),
  52. };
  53. Ok(site)
  54. }
  55. /// What the function name says
  56. pub fn enable_live_reload(&mut self) {
  57. self.live_reload = true;
  58. }
  59. /// Get all the orphan (== without section) pages in the site
  60. pub fn get_all_orphan_pages(&self) -> Vec<&Page> {
  61. let mut pages_in_sections = vec![];
  62. let mut orphans = vec![];
  63. for s in self.sections.values() {
  64. pages_in_sections.extend(s.all_pages_path());
  65. }
  66. for page in self.pages.values() {
  67. if !pages_in_sections.contains(&page.file.path) {
  68. orphans.push(page);
  69. }
  70. }
  71. orphans
  72. }
  73. /// Used by tests to change the output path to a tmp dir
  74. #[doc(hidden)]
  75. pub fn set_output_path<P: AsRef<Path>>(&mut self, path: P) {
  76. self.output_path = path.as_ref().to_path_buf();
  77. }
  78. /// Reads all .md files in the `content` directory and create pages/sections
  79. /// out of them
  80. pub fn load(&mut self) -> Result<()> {
  81. let base_path = self.base_path.to_string_lossy().replace("\\", "/");
  82. let content_glob = format!("{}/{}", base_path, "content/**/*.md");
  83. for entry in glob(&content_glob).unwrap().filter_map(|e| e.ok()) {
  84. let path = entry.as_path();
  85. if path.file_name().unwrap() == "_index.md" {
  86. self.add_section(path, false)?;
  87. } else {
  88. self.add_page(path, false)?;
  89. }
  90. }
  91. // Insert a default index section if necessary so we don't need to create
  92. // a _index.md to render the index page
  93. let index_path = self.base_path.join("content").join("_index.md");
  94. if !self.sections.contains_key(&index_path) {
  95. let mut index_section = Section::default();
  96. index_section.permalink = self.config.make_permalink("");
  97. self.sections.insert(index_path, index_section);
  98. }
  99. // Silly thing needed to make the borrow checker happy
  100. let mut pages_insert_anchors = HashMap::new();
  101. for page in self.pages.values() {
  102. pages_insert_anchors.insert(page.file.path.clone(), self.find_parent_section_insert_anchor(&page.file.parent.clone()));
  103. }
  104. {
  105. // Another silly thing needed to not borrow &self in parallel and
  106. // make the borrow checker happy
  107. let permalinks = &self.permalinks;
  108. let tera = &self.tera;
  109. let config = &self.config;
  110. self.pages.par_iter_mut()
  111. .map(|(_, page)| page)
  112. .map(|page| {
  113. let insert_anchor = pages_insert_anchors[&page.file.path];
  114. page.render_markdown(&permalinks, &tera, &config, insert_anchor)
  115. })
  116. .fold(|| Ok(()), Result::and)
  117. .reduce(|| Ok(()), Result::and)?;
  118. self.sections.par_iter_mut()
  119. .map(|(_, section)| section)
  120. .map(|section| section.render_markdown(permalinks, tera, config))
  121. .fold(|| Ok(()), Result::and)
  122. .reduce(|| Ok(()), Result::and)?;
  123. }
  124. self.populate_sections();
  125. self.populate_tags_and_categories();
  126. self.tera.register_global_function("get_page", global_fns::make_get_page(&self.pages));
  127. self.tera.register_global_function("get_section", global_fns::make_get_section(&self.sections));
  128. self.register_get_url_fn();
  129. Ok(())
  130. }
  131. /// Separate fn as it can be called in the serve command
  132. pub fn register_get_url_fn(&mut self) {
  133. self.tera.register_global_function("get_url", global_fns::make_get_url(self.permalinks.clone()));
  134. }
  135. /// Add a page to the site
  136. /// The `render` parameter is used in the serve command, when rebuilding a page.
  137. /// If `true`, it will also render the markdown for that page
  138. /// Returns the previous page struct if there was one
  139. pub fn add_page(&mut self, path: &Path, render: bool) -> Result<Option<Page>> {
  140. let page = Page::from_file(&path, &self.config)?;
  141. self.permalinks.insert(page.file.relative.clone(), page.permalink.clone());
  142. let prev = self.pages.insert(page.file.path.clone(), page);
  143. if render {
  144. let insert_anchor = self.find_parent_section_insert_anchor(&self.pages[path].file.parent);
  145. let mut page = self.pages.get_mut(path).unwrap();
  146. page.render_markdown(&self.permalinks, &self.tera, &self.config, insert_anchor)?;
  147. }
  148. Ok(prev)
  149. }
  150. /// Add a section to the site
  151. /// The `render` parameter is used in the serve command, when rebuilding a page.
  152. /// If `true`, it will also render the markdown for that page
  153. /// Returns the previous section struct if there was one
  154. pub fn add_section(&mut self, path: &Path, render: bool) -> Result<Option<Section>> {
  155. let section = Section::from_file(path, &self.config)?;
  156. self.permalinks.insert(section.file.relative.clone(), section.permalink.clone());
  157. let prev = self.sections.insert(section.file.path.clone(), section);
  158. if render {
  159. let mut section = self.sections.get_mut(path).unwrap();
  160. section.render_markdown(&self.permalinks, &self.tera, &self.config)?;
  161. }
  162. Ok(prev)
  163. }
  164. /// Finds the insert_anchor for the parent section of the directory at `path`.
  165. /// Defaults to `AnchorInsert::None` if no parent section found
  166. pub fn find_parent_section_insert_anchor(&self, parent_path: &PathBuf) -> InsertAnchor {
  167. match self.sections.get(&parent_path.join("_index.md")) {
  168. Some(s) => s.meta.insert_anchor.unwrap(),
  169. None => InsertAnchor::None
  170. }
  171. }
  172. /// Find out the direct subsections of each subsection if there are some
  173. /// as well as the pages for each section
  174. pub fn populate_sections(&mut self) {
  175. let mut grandparent_paths = HashMap::new();
  176. for section in self.sections.values_mut() {
  177. if let Some(ref grand_parent) = section.file.grand_parent {
  178. grandparent_paths.entry(grand_parent.to_path_buf()).or_insert_with(|| vec![]).push(section.clone());
  179. }
  180. // Make sure the pages of a section are empty since we can call that many times on `serve`
  181. section.pages = vec![];
  182. section.ignored_pages = vec![];
  183. }
  184. for page in self.pages.values() {
  185. let parent_section_path = page.file.parent.join("_index.md");
  186. if self.sections.contains_key(&parent_section_path) {
  187. self.sections.get_mut(&parent_section_path).unwrap().pages.push(page.clone());
  188. }
  189. }
  190. for section in self.sections.values_mut() {
  191. match grandparent_paths.get(&section.file.parent) {
  192. Some(paths) => section.subsections.extend(paths.clone()),
  193. None => continue,
  194. };
  195. }
  196. self.sort_sections_pages(None);
  197. }
  198. /// Sorts the pages of the section at the given path
  199. /// By default will sort all sections but can be made to only sort a single one by providing a path
  200. pub fn sort_sections_pages(&mut self, only: Option<&Path>) {
  201. for (path, section) in &mut self.sections {
  202. if let Some(p) = only {
  203. if p != path {
  204. continue;
  205. }
  206. }
  207. let (sorted_pages, cannot_be_sorted_pages) = sort_pages(section.pages.clone(), section.meta.sort_by());
  208. section.pages = populate_previous_and_next_pages(&sorted_pages);
  209. section.ignored_pages = cannot_be_sorted_pages;
  210. }
  211. }
  212. /// Find all the tags and categories if it's asked in the config
  213. pub fn populate_tags_and_categories(&mut self) {
  214. let generate_tags_pages = self.config.generate_tags_pages.unwrap();
  215. let generate_categories_pages = self.config.generate_categories_pages.unwrap();
  216. if !generate_tags_pages && !generate_categories_pages {
  217. return;
  218. }
  219. // TODO: can we pass a reference?
  220. let (tags, categories) = Taxonomy::find_tags_and_categories(
  221. self.pages.values().cloned().collect::<Vec<_>>()
  222. );
  223. if generate_tags_pages {
  224. self.tags = Some(tags);
  225. }
  226. if generate_categories_pages {
  227. self.categories = Some(categories);
  228. }
  229. }
  230. /// Inject live reload script tag if in live reload mode
  231. fn inject_livereload(&self, html: String) -> String {
  232. if self.live_reload {
  233. return html.replace(
  234. "</body>",
  235. r#"<script src="/livereload.js?port=1112&mindelay=10"></script></body>"#
  236. );
  237. }
  238. html
  239. }
  240. /// Copy static file to public directory.
  241. pub fn copy_static_file<P: AsRef<Path>>(&self, path: P) -> Result<()> {
  242. let relative_path = path.as_ref().strip_prefix(&self.static_path).unwrap();
  243. let target_path = self.output_path.join(relative_path);
  244. if let Some(parent_directory) = target_path.parent() {
  245. create_dir_all(parent_directory)?;
  246. }
  247. copy(path.as_ref(), &target_path)?;
  248. Ok(())
  249. }
  250. /// Copy the content of the `static` folder into the `public` folder
  251. pub fn copy_static_directory(&self) -> Result<()> {
  252. for entry in WalkDir::new(&self.static_path).into_iter().filter_map(|e| e.ok()) {
  253. let relative_path = entry.path().strip_prefix(&self.static_path).unwrap();
  254. let target_path = self.output_path.join(relative_path);
  255. if entry.path().is_dir() {
  256. if !target_path.exists() {
  257. create_directory(&target_path)?;
  258. }
  259. } else {
  260. let entry_fullpath = self.base_path.join(entry.path());
  261. self.copy_static_file(entry_fullpath)?;
  262. }
  263. }
  264. Ok(())
  265. }
  266. /// Deletes the `public` directory if it exists
  267. pub fn clean(&self) -> Result<()> {
  268. if self.output_path.exists() {
  269. // Delete current `public` directory so we can start fresh
  270. remove_dir_all(&self.output_path).chain_err(|| "Couldn't delete `public` directory")?;
  271. }
  272. Ok(())
  273. }
  274. /// Renders a single content page
  275. pub fn render_page(&self, page: &Page, section: Option<&Section>) -> Result<()> {
  276. ensure_directory_exists(&self.output_path)?;
  277. // Copy the nesting of the content directory if we have sections for that page
  278. let mut current_path = self.output_path.to_path_buf();
  279. for component in page.path.split('/') {
  280. current_path.push(component);
  281. if !current_path.exists() {
  282. create_directory(&current_path)?;
  283. }
  284. }
  285. // Make sure the folder exists
  286. create_directory(&current_path)?;
  287. // Finally, create a index.html file there with the page rendered
  288. let output = page.render_html(&self.tera, &self.config, section)?;
  289. create_file(&current_path.join("index.html"), &self.inject_livereload(output))?;
  290. // Copy any asset we found previously into the same directory as the index.html
  291. for asset in &page.assets {
  292. let asset_path = asset.as_path();
  293. copy(&asset_path, &current_path.join(asset_path.file_name().unwrap()))?;
  294. }
  295. Ok(())
  296. }
  297. /// Deletes the `public` directory and builds the site
  298. pub fn build(&self) -> Result<()> {
  299. self.clean()?;
  300. // Render aliases first to allow overwriting
  301. self.render_aliases()?;
  302. self.render_sections()?;
  303. self.render_orphan_pages()?;
  304. self.render_sitemap()?;
  305. if self.config.generate_rss.unwrap() {
  306. self.render_rss_feed()?;
  307. }
  308. self.render_robots()?;
  309. // `render_categories` and `render_tags` will check whether the config allows
  310. // them to render or not
  311. self.render_categories()?;
  312. self.render_tags()?;
  313. self.copy_static_directory()
  314. }
  315. pub fn render_aliases(&self) -> Result<()> {
  316. for page in self.pages.values() {
  317. if let Some(ref aliases) = page.meta.aliases {
  318. for alias in aliases {
  319. let mut output_path = self.output_path.to_path_buf();
  320. for component in alias.split("/") {
  321. output_path.push(&component);
  322. if !output_path.exists() {
  323. create_directory(&output_path)?;
  324. }
  325. }
  326. create_file(&output_path.join("index.html"), &render_redirect_template(&page.permalink, &self.tera)?)?;
  327. }
  328. }
  329. }
  330. Ok(())
  331. }
  332. /// Renders robots.txt
  333. pub fn render_robots(&self) -> Result<()> {
  334. ensure_directory_exists(&self.output_path)?;
  335. create_file(
  336. &self.output_path.join("robots.txt"),
  337. &self.tera.render("robots.txt", &Context::new())?
  338. )
  339. }
  340. /// Renders all categories and the single category pages if there are some
  341. pub fn render_categories(&self) -> Result<()> {
  342. if let Some(ref categories) = self.categories {
  343. self.render_taxonomy(categories)?;
  344. }
  345. Ok(())
  346. }
  347. /// Renders all tags and the single tag pages if there are some
  348. pub fn render_tags(&self) -> Result<()> {
  349. if let Some(ref tags) = self.tags {
  350. self.render_taxonomy(tags)?;
  351. }
  352. Ok(())
  353. }
  354. fn render_taxonomy(&self, taxonomy: &Taxonomy) -> Result<()> {
  355. if taxonomy.items.is_empty() {
  356. return Ok(())
  357. }
  358. ensure_directory_exists(&self.output_path)?;
  359. let output_path = self.output_path.join(&taxonomy.get_list_name());
  360. let list_output = taxonomy.render_list(&self.tera, &self.config)?;
  361. create_directory(&output_path)?;
  362. create_file(&output_path.join("index.html"), &self.inject_livereload(list_output))?;
  363. for item in &taxonomy.items {
  364. let single_output = taxonomy.render_single_item(item, &self.tera, &self.config)?;
  365. create_directory(&output_path.join(&item.slug))?;
  366. create_file(
  367. &output_path.join(&item.slug).join("index.html"),
  368. &self.inject_livereload(single_output)
  369. )?;
  370. }
  371. Ok(())
  372. }
  373. /// What it says on the tin
  374. pub fn render_sitemap(&self) -> Result<()> {
  375. ensure_directory_exists(&self.output_path)?;
  376. let mut context = Context::new();
  377. context.add("pages", &self.pages.values().collect::<Vec<&Page>>());
  378. context.add("sections", &self.sections.values().collect::<Vec<&Section>>());
  379. let mut categories = vec![];
  380. if let Some(ref c) = self.categories {
  381. let name = c.get_list_name();
  382. categories.push(self.config.make_permalink(&name));
  383. for item in &c.items {
  384. categories.push(
  385. self.config.make_permalink(&format!("{}/{}", &name, item.slug))
  386. );
  387. }
  388. }
  389. context.add("categories", &categories);
  390. let mut tags = vec![];
  391. if let Some(ref t) = self.tags {
  392. let name = t.get_list_name();
  393. tags.push(self.config.make_permalink(&name));
  394. for item in &t.items {
  395. tags.push(
  396. self.config.make_permalink(&format!("{}/{}", &name, item.slug))
  397. );
  398. }
  399. }
  400. context.add("tags", &tags);
  401. let sitemap = self.tera.render("sitemap.xml", &context)?;
  402. create_file(&self.output_path.join("sitemap.xml"), &sitemap)?;
  403. Ok(())
  404. }
  405. pub fn render_rss_feed(&self) -> Result<()> {
  406. ensure_directory_exists(&self.output_path)?;
  407. let mut context = Context::new();
  408. let pages = self.pages.values()
  409. .filter(|p| p.meta.date.is_some())
  410. .take(self.config.rss_limit.unwrap()) // limit to the last n elements
  411. .cloned()
  412. .collect::<Vec<Page>>();
  413. // Don't generate a RSS feed if none of the pages has a date
  414. if pages.is_empty() {
  415. return Ok(());
  416. }
  417. context.add("last_build_date", &pages[0].meta.date);
  418. let (sorted_pages, _) = sort_pages(pages, SortBy::Date);
  419. context.add("pages", &sorted_pages);
  420. context.add("config", &self.config);
  421. let rss_feed_url = if self.config.base_url.ends_with('/') {
  422. format!("{}{}", self.config.base_url, "rss.xml")
  423. } else {
  424. format!("{}/{}", self.config.base_url, "rss.xml")
  425. };
  426. context.add("feed_url", &rss_feed_url);
  427. let sitemap = self.tera.render("rss.xml", &context)?;
  428. create_file(&self.output_path.join("rss.xml"), &sitemap)?;
  429. Ok(())
  430. }
  431. /// Create a hashmap of paths to section
  432. /// For example `content/posts/_index.md` key will be `posts`
  433. /// The index section will always be called `index` so don't use a path such as
  434. /// `content/index/_index.md` yourself
  435. fn get_sections_map(&self) -> HashMap<String, Section> {
  436. self.sections
  437. .values()
  438. .map(|s| (if s.is_index() { "index".to_string() } else { s.file.components.join("/") }, s.clone()))
  439. .collect()
  440. }
  441. /// Renders a single section
  442. pub fn render_section(&self, section: &Section, render_pages: bool) -> Result<()> {
  443. ensure_directory_exists(&self.output_path)?;
  444. let public = self.output_path.clone();
  445. let mut output_path = public.to_path_buf();
  446. for component in &section.file.components {
  447. output_path.push(component);
  448. if !output_path.exists() {
  449. create_directory(&output_path)?;
  450. }
  451. }
  452. if render_pages {
  453. for page in &section.pages {
  454. self.render_page(page, Some(section))?;
  455. }
  456. }
  457. if !section.meta.should_render() {
  458. return Ok(());
  459. }
  460. if section.meta.is_paginated() {
  461. self.render_paginated(&output_path, section)?;
  462. } else {
  463. let output = section.render_html(
  464. if section.is_index() { self.get_sections_map() } else { HashMap::new() },
  465. &self.tera,
  466. &self.config,
  467. )?;
  468. create_file(&output_path.join("index.html"), &self.inject_livereload(output))?;
  469. }
  470. Ok(())
  471. }
  472. pub fn render_index(&self) -> Result<()> {
  473. self.render_section(&self.sections[&self.base_path.join("content").join("_index.md")], false)
  474. }
  475. /// Renders all sections
  476. pub fn render_sections(&self) -> Result<()> {
  477. for section in self.sections.values() {
  478. self.render_section(section, true)?;
  479. }
  480. Ok(())
  481. }
  482. /// Renders all pages that do not belong to any sections
  483. pub fn render_orphan_pages(&self) -> Result<()> {
  484. ensure_directory_exists(&self.output_path)?;
  485. for page in self.get_all_orphan_pages() {
  486. self.render_page(page, None)?;
  487. }
  488. Ok(())
  489. }
  490. /// Renders a list of pages when the section/index is wanting pagination.
  491. fn render_paginated(&self, output_path: &Path, section: &Section) -> Result<()> {
  492. ensure_directory_exists(&self.output_path)?;
  493. let paginate_path = match section.meta.paginate_path {
  494. Some(ref s) => s.clone(),
  495. None => unreachable!()
  496. };
  497. let paginator = Paginator::new(&section.pages, section);
  498. for (i, pager) in paginator.pagers.iter().enumerate() {
  499. let folder_path = output_path.join(&paginate_path);
  500. let page_path = folder_path.join(&format!("{}", i + 1));
  501. create_directory(&folder_path)?;
  502. create_directory(&page_path)?;
  503. let output = paginator.render_pager(pager, self)?;
  504. if i > 0 {
  505. create_file(&page_path.join("index.html"), &self.inject_livereload(output))?;
  506. } else {
  507. create_file(&output_path.join("index.html"), &self.inject_livereload(output))?;
  508. create_file(&page_path.join("index.html"), &render_redirect_template(&section.permalink, &self.tera)?)?;
  509. }
  510. }
  511. Ok(())
  512. }
  513. }
  514. /// Resolves an internal link (of the `./posts/something.md#hey` sort) to its absolute link
  515. pub fn resolve_internal_link(link: &str, permalinks: &HashMap<String, String>) -> Result<String> {
  516. // First we remove the ./ since that's gutenberg specific
  517. let clean_link = link.replacen("./", "", 1);
  518. // Then we remove any potential anchor
  519. // parts[0] will be the file path and parts[1] the anchor if present
  520. let parts = clean_link.split('#').collect::<Vec<_>>();
  521. match permalinks.get(parts[0]) {
  522. Some(p) => {
  523. if parts.len() > 1 {
  524. Ok(format!("{}#{}", p, parts[1]))
  525. } else {
  526. Ok(p.to_string())
  527. }
  528. },
  529. None => bail!(format!("Relative link {} not found.", link)),
  530. }
  531. }
  532. #[cfg(test)]
  533. mod tests {
  534. use std::collections::HashMap;
  535. use super::resolve_internal_link;
  536. #[test]
  537. fn can_resolve_valid_internal_link() {
  538. let mut permalinks = HashMap::new();
  539. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  540. let res = resolve_internal_link("./pages/about.md", &permalinks).unwrap();
  541. assert_eq!(res, "https://vincent.is/about");
  542. }
  543. #[test]
  544. fn can_resolve_internal_links_with_anchors() {
  545. let mut permalinks = HashMap::new();
  546. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  547. let res = resolve_internal_link("./pages/about.md#hello", &permalinks).unwrap();
  548. assert_eq!(res, "https://vincent.is/about#hello");
  549. }
  550. #[test]
  551. fn errors_resolve_inexistant_internal_link() {
  552. let res = resolve_internal_link("./pages/about.md#hello", &HashMap::new());
  553. assert!(res.is_err());
  554. }
  555. }