You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

482 lines
15KB

  1. /// A page, can be a blog post or a basic page
  2. use std::cmp::Ordering;
  3. use std::fs::File;
  4. use std::io::prelude::*;
  5. use std::path::Path;
  6. use std::result::Result as StdResult;
  7. use regex::Regex;
  8. use tera::{Tera, Context};
  9. use serde::ser::{SerializeStruct, self};
  10. use slug::slugify;
  11. use errors::{Result, ResultExt};
  12. use config::Config;
  13. use front_matter::{FrontMatter};
  14. use markdown::markdown_to_html;
  15. lazy_static! {
  16. static ref PAGE_RE: Regex = Regex::new(r"^\n?\+\+\+\n((?s).*(?-s))\+\+\+\n((?s).*(?-s))$").unwrap();
  17. static ref SUMMARY_RE: Regex = Regex::new(r"<!-- more -->").unwrap();
  18. static ref CODE_BLOCK_RE: Regex = Regex::new(r"```").unwrap();
  19. }
  20. #[derive(Clone, Debug, PartialEq, Deserialize)]
  21. pub struct Page {
  22. /// .md filepath, excluding the content/ bit
  23. #[serde(skip_serializing)]
  24. pub filepath: String,
  25. /// The name of the .md file
  26. #[serde(skip_serializing)]
  27. pub filename: String,
  28. /// The directories above our .md file are called sections
  29. /// for example a file at content/kb/solutions/blabla.md will have 2 sections:
  30. /// `kb` and `solutions`
  31. #[serde(skip_serializing)]
  32. pub sections: Vec<String>,
  33. /// The actual content of the page, in markdown
  34. #[serde(skip_serializing)]
  35. pub raw_content: String,
  36. /// The HTML rendered of the page
  37. pub content: String,
  38. /// The front matter meta-data
  39. pub meta: FrontMatter,
  40. /// The slug of that page.
  41. /// First tries to find the slug in the meta and defaults to filename otherwise
  42. pub slug: String,
  43. /// The relative URL of the page
  44. pub url: String,
  45. /// The full URL for that page
  46. pub permalink: String,
  47. /// The summary for the article, defaults to empty string
  48. /// When <!-- more --> is found in the text, will take the content up to that part
  49. /// as summary
  50. pub summary: String,
  51. /// The previous page, by date
  52. pub previous: Option<Box<Page>>,
  53. /// The next page, by date
  54. pub next: Option<Box<Page>>,
  55. }
  56. impl Page {
  57. pub fn new(meta: FrontMatter) -> Page {
  58. Page {
  59. filepath: "".to_string(),
  60. filename: "".to_string(),
  61. sections: vec![],
  62. raw_content: "".to_string(),
  63. content: "".to_string(),
  64. slug: "".to_string(),
  65. url: "".to_string(),
  66. permalink: "".to_string(),
  67. summary: "".to_string(),
  68. meta: meta,
  69. previous: None,
  70. next: None,
  71. }
  72. }
  73. // Get word count and estimated reading time
  74. pub fn get_reading_analytics(&self) -> (usize, usize) {
  75. // Only works for latin language but good enough for a start
  76. let word_count: usize = self.raw_content.split_whitespace().count();
  77. // https://help.medium.com/hc/en-us/articles/214991667-Read-time
  78. // 275 seems a bit too high though
  79. (word_count, (word_count / 200))
  80. }
  81. // Parse a page given the content of the .md file
  82. // Files without front matter or with invalid front matter are considered
  83. // erroneous
  84. pub fn parse(filepath: &str, content: &str, config: &Config) -> Result<Page> {
  85. // 1. separate front matter from content
  86. if !PAGE_RE.is_match(content) {
  87. bail!("Couldn't find front matter in `{}`. Did you forget to add `+++`?", filepath);
  88. }
  89. // 2. extract the front matter and the content
  90. let caps = PAGE_RE.captures(content).unwrap();
  91. // caps[0] is the full match
  92. let front_matter = &caps[1];
  93. let content = &caps[2];
  94. // 3. create our page, parse front matter and assign all of that
  95. let meta = FrontMatter::parse(&front_matter)
  96. .chain_err(|| format!("Error when parsing front matter of file `{}`", filepath))?;
  97. let mut page = Page::new(meta);
  98. page.filepath = filepath.to_string();
  99. page.raw_content = content.to_string();
  100. // We try to be smart about highlighting code as it can be time-consuming
  101. // If the global config disables it, then we do nothing. However,
  102. // if we see a code block in the content, we assume that this page needs
  103. // to be highlighted. It could potentially have false positive if the content
  104. // has ``` in it but that seems kind of unlikely
  105. let mut should_highlight = config.highlight_code.unwrap();
  106. if should_highlight {
  107. should_highlight = CODE_BLOCK_RE.is_match(&page.raw_content);
  108. }
  109. page.content = markdown_to_html(&page.raw_content, should_highlight);
  110. if page.raw_content.contains("<!-- more -->") {
  111. page.summary = {
  112. let summary = SUMMARY_RE.split(&page.raw_content).collect::<Vec<&str>>()[0];
  113. markdown_to_html(summary, should_highlight)
  114. }
  115. }
  116. let path = Path::new(filepath);
  117. page.filename = path.file_stem().expect("Couldn't get filename").to_string_lossy().to_string();
  118. page.slug = {
  119. if let Some(ref slug) = page.meta.slug {
  120. slug.trim().to_string()
  121. } else {
  122. slugify(page.filename.clone())
  123. }
  124. };
  125. // 4. Find sections
  126. // Pages with custom urls exists outside of sections
  127. if let Some(ref u) = page.meta.url {
  128. page.url = u.trim().to_string();
  129. } else {
  130. // find out if we have sections
  131. for section in path.parent().unwrap().components() {
  132. page.sections.push(section.as_ref().to_string_lossy().to_string());
  133. }
  134. if !page.sections.is_empty() {
  135. page.url = format!("{}/{}", page.sections.join("/"), page.slug);
  136. } else {
  137. page.url = format!("{}", page.slug);
  138. }
  139. }
  140. page.permalink = if config.base_url.ends_with("/") {
  141. format!("{}{}", config.base_url, page.url)
  142. } else {
  143. format!("{}/{}", config.base_url, page.url)
  144. };
  145. Ok(page)
  146. }
  147. pub fn from_file<P: AsRef<Path>>(path: P, config: &Config) -> Result<Page> {
  148. let path = path.as_ref();
  149. let mut content = String::new();
  150. File::open(path)
  151. .chain_err(|| format!("Failed to open '{:?}'", path.display()))?
  152. .read_to_string(&mut content)?;
  153. // Remove the content string from name
  154. // Maybe get a path as an arg instead and use strip_prefix?
  155. Page::parse(&path.strip_prefix("content").unwrap().to_string_lossy(), &content, config)
  156. }
  157. /// Renders the page using the default layout, unless specified in front-matter
  158. pub fn render_html(&self, tera: &Tera, config: &Config) -> Result<String> {
  159. let tpl_name = match self.meta.template {
  160. Some(ref l) => l.to_string(),
  161. None => "page.html".to_string()
  162. };
  163. // TODO: create a helper to create context to ensure all contexts
  164. // have the same names
  165. let mut context = Context::new();
  166. context.add("config", config);
  167. context.add("page", self);
  168. tera.render(&tpl_name, &context)
  169. .chain_err(|| format!("Failed to render page '{}'", self.filename))
  170. }
  171. }
  172. impl ser::Serialize for Page {
  173. fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error> where S: ser::Serializer {
  174. let mut state = serializer.serialize_struct("page", 13)?;
  175. state.serialize_field("content", &self.content)?;
  176. state.serialize_field("title", &self.meta.title)?;
  177. state.serialize_field("description", &self.meta.description)?;
  178. state.serialize_field("date", &self.meta.date)?;
  179. state.serialize_field("slug", &self.slug)?;
  180. state.serialize_field("url", &format!("/{}", self.url))?;
  181. state.serialize_field("permalink", &self.permalink)?;
  182. state.serialize_field("tags", &self.meta.tags)?;
  183. state.serialize_field("draft", &self.meta.draft)?;
  184. state.serialize_field("category", &self.meta.category)?;
  185. state.serialize_field("extra", &self.meta.extra)?;
  186. let (word_count, reading_time) = self.get_reading_analytics();
  187. state.serialize_field("word_count", &word_count)?;
  188. state.serialize_field("reading_time", &reading_time)?;
  189. state.end()
  190. }
  191. }
  192. impl PartialOrd for Page {
  193. fn partial_cmp(&self, other: &Page) -> Option<Ordering> {
  194. if self.meta.date.is_none() {
  195. return Some(Ordering::Less);
  196. }
  197. if other.meta.date.is_none() {
  198. return Some(Ordering::Greater);
  199. }
  200. let this_date = self.meta.parse_date().unwrap();
  201. let other_date = other.meta.parse_date().unwrap();
  202. if this_date > other_date {
  203. return Some(Ordering::Less);
  204. }
  205. if this_date < other_date {
  206. return Some(Ordering::Greater);
  207. }
  208. Some(Ordering::Equal)
  209. }
  210. }
  211. #[cfg(test)]
  212. mod tests {
  213. use super::{Page};
  214. use config::Config;
  215. #[test]
  216. fn test_can_parse_a_valid_page() {
  217. let content = r#"
  218. +++
  219. title = "Hello"
  220. description = "hey there"
  221. slug = "hello-world"
  222. +++
  223. Hello world"#;
  224. let res = Page::parse("post.md", content, &Config::default());
  225. assert!(res.is_ok());
  226. let page = res.unwrap();
  227. assert_eq!(page.meta.title, "Hello".to_string());
  228. assert_eq!(page.meta.slug.unwrap(), "hello-world".to_string());
  229. assert_eq!(page.raw_content, "Hello world".to_string());
  230. assert_eq!(page.content, "<p>Hello world</p>\n".to_string());
  231. }
  232. #[test]
  233. fn test_can_find_one_parent_directory() {
  234. let content = r#"
  235. +++
  236. title = "Hello"
  237. description = "hey there"
  238. slug = "hello-world"
  239. +++
  240. Hello world"#;
  241. let res = Page::parse("posts/intro.md", content, &Config::default());
  242. assert!(res.is_ok());
  243. let page = res.unwrap();
  244. assert_eq!(page.sections, vec!["posts".to_string()]);
  245. }
  246. #[test]
  247. fn test_can_find_multiple_parent_directories() {
  248. let content = r#"
  249. +++
  250. title = "Hello"
  251. description = "hey there"
  252. slug = "hello-world"
  253. +++
  254. Hello world"#;
  255. let res = Page::parse("posts/intro/start.md", content, &Config::default());
  256. assert!(res.is_ok());
  257. let page = res.unwrap();
  258. assert_eq!(page.sections, vec!["posts".to_string(), "intro".to_string()]);
  259. }
  260. #[test]
  261. fn test_can_make_url_from_sections_and_slug() {
  262. let content = r#"
  263. +++
  264. title = "Hello"
  265. description = "hey there"
  266. slug = "hello-world"
  267. +++
  268. Hello world"#;
  269. let mut conf = Config::default();
  270. conf.base_url = "http://hello.com/".to_string();
  271. let res = Page::parse("posts/intro/start.md", content, &conf);
  272. assert!(res.is_ok());
  273. let page = res.unwrap();
  274. assert_eq!(page.url, "posts/intro/hello-world");
  275. assert_eq!(page.permalink, "http://hello.com/posts/intro/hello-world");
  276. }
  277. #[test]
  278. fn test_can_make_permalink_with_non_trailing_slash_base_url() {
  279. let content = r#"
  280. +++
  281. title = "Hello"
  282. description = "hey there"
  283. slug = "hello-world"
  284. +++
  285. Hello world"#;
  286. let mut conf = Config::default();
  287. conf.base_url = "http://hello.com".to_string();
  288. let res = Page::parse("posts/intro/start.md", content, &conf);
  289. assert!(res.is_ok());
  290. let page = res.unwrap();
  291. assert_eq!(page.url, "posts/intro/hello-world");
  292. assert_eq!(page.permalink, format!("{}{}", conf.base_url, "/posts/intro/hello-world"));
  293. }
  294. #[test]
  295. fn test_can_make_url_from_slug_only() {
  296. let content = r#"
  297. +++
  298. title = "Hello"
  299. description = "hey there"
  300. slug = "hello-world"
  301. +++
  302. Hello world"#;
  303. let res = Page::parse("start.md", content, &Config::default());
  304. assert!(res.is_ok());
  305. let page = res.unwrap();
  306. assert_eq!(page.url, "hello-world");
  307. assert_eq!(page.permalink, format!("{}{}", Config::default().base_url, "hello-world"));
  308. }
  309. #[test]
  310. fn test_errors_on_invalid_front_matter_format() {
  311. let content = r#"
  312. title = "Hello"
  313. description = "hey there"
  314. slug = "hello-world"
  315. +++
  316. Hello world"#;
  317. let res = Page::parse("start.md", content, &Config::default());
  318. assert!(res.is_err());
  319. }
  320. #[test]
  321. fn test_can_make_slug_from_non_slug_filename() {
  322. let content = r#"
  323. +++
  324. title = "Hello"
  325. description = "hey there"
  326. +++
  327. Hello world"#;
  328. let res = Page::parse("file with space.md", content, &Config::default());
  329. assert!(res.is_ok());
  330. let page = res.unwrap();
  331. assert_eq!(page.slug, "file-with-space");
  332. assert_eq!(page.permalink, format!("{}{}", Config::default().base_url, "file-with-space"));
  333. }
  334. #[test]
  335. fn test_trim_slug_if_needed() {
  336. let content = r#"
  337. +++
  338. title = "Hello"
  339. description = "hey there"
  340. +++
  341. Hello world"#;
  342. let res = Page::parse(" file with space.md", content, &Config::default());
  343. assert!(res.is_ok());
  344. let page = res.unwrap();
  345. assert_eq!(page.slug, "file-with-space");
  346. assert_eq!(page.permalink, format!("{}{}", Config::default().base_url, "file-with-space"));
  347. }
  348. #[test]
  349. fn test_reading_analytics_short() {
  350. let content = r#"
  351. +++
  352. title = "Hello"
  353. description = "hey there"
  354. +++
  355. Hello world"#;
  356. let res = Page::parse("file with space.md", content, &Config::default());
  357. assert!(res.is_ok());
  358. let page = res.unwrap();
  359. let (word_count, reading_time) = page.get_reading_analytics();
  360. assert_eq!(word_count, 2);
  361. assert_eq!(reading_time, 0);
  362. }
  363. #[test]
  364. fn test_reading_analytics_long() {
  365. let mut content = r#"
  366. +++
  367. title = "Hello"
  368. description = "hey there"
  369. +++
  370. Hello world"#.to_string();
  371. for _ in 0..1000 {
  372. content.push_str(" Hello world");
  373. }
  374. let res = Page::parse("hello.md", &content, &Config::default());
  375. assert!(res.is_ok());
  376. let page = res.unwrap();
  377. let (word_count, reading_time) = page.get_reading_analytics();
  378. assert_eq!(word_count, 2002);
  379. assert_eq!(reading_time, 10);
  380. }
  381. #[test]
  382. fn test_automatic_summary_is_empty_string() {
  383. let content = r#"
  384. +++
  385. title = "Hello"
  386. description = "hey there"
  387. +++
  388. Hello world"#.to_string();
  389. let res = Page::parse("hello.md", &content, &Config::default());
  390. assert!(res.is_ok());
  391. let page = res.unwrap();
  392. assert_eq!(page.summary, "");
  393. }
  394. #[test]
  395. fn test_can_specify_summary() {
  396. let content = r#"
  397. +++
  398. title = "Hello"
  399. description = "hey there"
  400. +++
  401. Hello world
  402. <!-- more -->
  403. "#.to_string();
  404. let res = Page::parse("hello.md", &content, &Config::default());
  405. assert!(res.is_ok());
  406. let page = res.unwrap();
  407. assert_eq!(page.summary, "<p>Hello world</p>\n");
  408. }
  409. #[test]
  410. fn test_can_auto_detect_when_highlighting_needed() {
  411. let content = r#"
  412. +++
  413. title = "Hello"
  414. description = "hey there"
  415. +++
  416. ```
  417. Hey there
  418. ```
  419. "#.to_string();
  420. let mut config = Config::default();
  421. config.highlight_code = Some(true);
  422. let res = Page::parse("hello.md", &content, &config);
  423. assert!(res.is_ok());
  424. let page = res.unwrap();
  425. assert!(page.content.starts_with("<pre"));
  426. }
  427. }