You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

552 lines
23KB

  1. use std::borrow::Cow::Owned;
  2. use pulldown_cmark as cmark;
  3. use self::cmark::{Parser, Event, Tag, Options, OPTION_ENABLE_TABLES, OPTION_ENABLE_FOOTNOTES};
  4. use slug::slugify;
  5. use syntect::easy::HighlightLines;
  6. use syntect::html::{start_coloured_html_snippet, styles_to_coloured_html, IncludeBackground};
  7. use tera::{Context as TeraContext};
  8. use errors::Result;
  9. use utils::site::resolve_internal_link;
  10. use front_matter::InsertAnchor;
  11. use context::Context;
  12. use highlighting::{SYNTAX_SET, THEME_SET};
  13. use short_code::{SHORTCODE_RE, ShortCode, parse_shortcode, render_simple_shortcode};
  14. use table_of_contents::{TempHeader, Header, make_table_of_contents};
  15. pub fn markdown_to_html(content: &str, context: &Context) -> Result<(String, Vec<Header>)> {
  16. // We try to be smart about highlighting code as it can be time-consuming
  17. // If the global config disables it, then we do nothing. However,
  18. // if we see a code block in the content, we assume that this page needs
  19. // to be highlighted. It could potentially have false positive if the content
  20. // has ``` in it but that seems kind of unlikely
  21. let should_highlight = if context.highlight_code {
  22. content.contains("```")
  23. } else {
  24. false
  25. };
  26. // Set while parsing
  27. let mut error = None;
  28. let mut highlighter: Option<HighlightLines> = None;
  29. let mut shortcode_block = None;
  30. // shortcodes live outside of paragraph so we need to ensure we don't close
  31. // a paragraph that has already been closed
  32. let mut added_shortcode = false;
  33. // Don't transform things that look like shortcodes in code blocks
  34. let mut in_code_block = false;
  35. // If we get text in header, we need to insert the id and a anchor
  36. let mut in_header = false;
  37. // pulldown_cmark can send several text events for a title if there are markdown
  38. // specific characters like `!` in them. We only want to insert the anchor the first time
  39. let mut header_already_inserted = false;
  40. let mut anchors: Vec<String> = vec![];
  41. // the rendered html
  42. let mut html = String::new();
  43. // We might have cases where the slug is already present in our list of anchor
  44. // for example an article could have several titles named Example
  45. // We add a counter after the slug if the slug is already present, which
  46. // means we will have example, example-1, example-2 etc
  47. fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
  48. if level == 0 && !anchors.contains(&name) {
  49. return name.to_string();
  50. }
  51. let new_anchor = format!("{}-{}", name, level + 1);
  52. if !anchors.contains(&new_anchor) {
  53. return new_anchor;
  54. }
  55. find_anchor(anchors, name, level + 1)
  56. }
  57. let mut headers = vec![];
  58. // Defaults to a 0 level so not a real header
  59. // It should be an Option ideally but not worth the hassle to update
  60. let mut temp_header = TempHeader::default();
  61. let mut opts = Options::empty();
  62. opts.insert(OPTION_ENABLE_TABLES);
  63. opts.insert(OPTION_ENABLE_FOOTNOTES);
  64. {
  65. let parser = Parser::new_ext(content, opts).map(|event| match event {
  66. Event::Text(text) => {
  67. // if we are in the middle of a code block
  68. if let Some(ref mut highlighter) = highlighter {
  69. let highlighted = &highlighter.highlight(&text);
  70. let html = styles_to_coloured_html(highlighted, IncludeBackground::Yes);
  71. return Event::Html(Owned(html));
  72. }
  73. if in_code_block {
  74. return Event::Text(text);
  75. }
  76. // Shortcode without body
  77. if shortcode_block.is_none() && text.starts_with("{{") && text.ends_with("}}") && SHORTCODE_RE.is_match(&text) {
  78. let (name, args) = parse_shortcode(&text);
  79. added_shortcode = true;
  80. match render_simple_shortcode(context.tera, &name, &args) {
  81. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  82. Err(e) => {
  83. error = Some(e);
  84. return Event::Html(Owned("".to_string()));
  85. }
  86. }
  87. // non-matching will be returned normally below
  88. }
  89. // Shortcode with a body
  90. if shortcode_block.is_none() && text.starts_with("{%") && text.ends_with("%}") {
  91. if SHORTCODE_RE.is_match(&text) {
  92. let (name, args) = parse_shortcode(&text);
  93. shortcode_block = Some(ShortCode::new(&name, args));
  94. }
  95. // Don't return anything
  96. return Event::Text(Owned("".to_string()));
  97. }
  98. // If we have some text while in a shortcode, it's either the body
  99. // or the end tag
  100. if shortcode_block.is_some() {
  101. if let Some(ref mut shortcode) = shortcode_block {
  102. if text.trim() == "{% end %}" {
  103. added_shortcode = true;
  104. match shortcode.render(context.tera) {
  105. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  106. Err(e) => {
  107. error = Some(e);
  108. return Event::Html(Owned("".to_string()));
  109. }
  110. }
  111. } else {
  112. shortcode.append(&text);
  113. return Event::Html(Owned("".to_string()));
  114. }
  115. }
  116. }
  117. if in_header {
  118. if header_already_inserted {
  119. return Event::Text(text);
  120. }
  121. let id = find_anchor(&anchors, slugify(&text), 0);
  122. anchors.push(id.clone());
  123. let anchor_link = if context.should_insert_anchor() {
  124. let mut c = TeraContext::new();
  125. c.add("id", &id);
  126. context.tera.render("anchor-link.html", &c).unwrap()
  127. } else {
  128. String::new()
  129. };
  130. // update the header and add it to the list
  131. temp_header.id = id.clone();
  132. temp_header.title = text.clone().into_owned();
  133. temp_header.permalink = format!("{}#{}", context.current_page_permalink, id);
  134. headers.push(temp_header.clone());
  135. temp_header = TempHeader::default();
  136. header_already_inserted = true;
  137. let event = match context.insert_anchor {
  138. InsertAnchor::Left => Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, anchor_link, text))),
  139. InsertAnchor::Right => Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, text, anchor_link))),
  140. InsertAnchor::None => Event::Html(Owned(format!(r#"id="{}">{}"#, id, text)))
  141. };
  142. return event;
  143. }
  144. // Business as usual
  145. Event::Text(text)
  146. },
  147. Event::Start(Tag::CodeBlock(ref info)) => {
  148. in_code_block = true;
  149. if !should_highlight {
  150. return Event::Html(Owned("<pre><code>".to_owned()));
  151. }
  152. let theme = &THEME_SET.themes[&context.highlight_theme];
  153. highlighter = SYNTAX_SET.with(|ss| {
  154. let syntax = info
  155. .split(' ')
  156. .next()
  157. .and_then(|lang| ss.find_syntax_by_token(lang))
  158. .unwrap_or_else(|| ss.find_syntax_plain_text());
  159. Some(HighlightLines::new(syntax, theme))
  160. });
  161. let snippet = start_coloured_html_snippet(theme);
  162. Event::Html(Owned(snippet))
  163. },
  164. Event::End(Tag::CodeBlock(_)) => {
  165. in_code_block = false;
  166. if !should_highlight{
  167. return Event::Html(Owned("</code></pre>\n".to_owned()))
  168. }
  169. // reset highlight and close the code block
  170. highlighter = None;
  171. Event::Html(Owned("</pre>".to_owned()))
  172. },
  173. // Need to handle relative links
  174. Event::Start(Tag::Link(ref link, ref title)) => {
  175. if in_header {
  176. return Event::Html(Owned("".to_owned()));
  177. }
  178. if link.starts_with("./") {
  179. match resolve_internal_link(link, context.permalinks) {
  180. Ok(url) => {
  181. return Event::Start(Tag::Link(Owned(url), title.clone()));
  182. },
  183. Err(_) => {
  184. error = Some(format!("Relative link {} not found.", link).into());
  185. return Event::Html(Owned("".to_string()));
  186. }
  187. };
  188. }
  189. Event::Start(Tag::Link(link.clone(), title.clone()))
  190. },
  191. Event::End(Tag::Link(_, _)) => {
  192. if in_header {
  193. return Event::Html(Owned("".to_owned()));
  194. }
  195. event
  196. }
  197. // need to know when we are in a code block to disable shortcodes in them
  198. Event::Start(Tag::Code) => {
  199. in_code_block = true;
  200. event
  201. },
  202. Event::End(Tag::Code) => {
  203. in_code_block = false;
  204. event
  205. },
  206. Event::Start(Tag::Header(num)) => {
  207. in_header = true;
  208. temp_header = TempHeader::new(num);
  209. // ugly eh
  210. Event::Html(Owned(format!("<h{} ", num)))
  211. },
  212. Event::End(Tag::Header(_)) => {
  213. in_header = false;
  214. header_already_inserted = false;
  215. event
  216. },
  217. // If we added shortcodes, don't close a paragraph since there's none
  218. Event::End(Tag::Paragraph) => {
  219. if added_shortcode {
  220. added_shortcode = false;
  221. return Event::Html(Owned("".to_owned()));
  222. }
  223. event
  224. },
  225. // Ignore softbreaks inside shortcodes
  226. Event::SoftBreak => {
  227. if shortcode_block.is_some() {
  228. return Event::Html(Owned("".to_owned()));
  229. }
  230. event
  231. },
  232. _ => {
  233. // println!("event = {:?}", event);
  234. event
  235. },
  236. });
  237. cmark::html::push_html(&mut html, parser);
  238. }
  239. match error {
  240. Some(e) => Err(e),
  241. None => Ok((html.replace("<p></p>", ""), make_table_of_contents(headers))),
  242. }
  243. }
  244. #[cfg(test)]
  245. mod tests {
  246. use std::collections::HashMap;
  247. use tera::Tera;
  248. use front_matter::InsertAnchor;
  249. use templates::GUTENBERG_TERA;
  250. use context::Context;
  251. use super::markdown_to_html;
  252. #[test]
  253. fn can_do_markdown_to_html_simple() {
  254. let tera_ctx = Tera::default();
  255. let permalinks_ctx = HashMap::new();
  256. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  257. let res = markdown_to_html("hello", &context).unwrap();
  258. assert_eq!(res.0, "<p>hello</p>\n");
  259. }
  260. #[test]
  261. fn doesnt_highlight_code_block_with_highlighting_off() {
  262. let tera_ctx = Tera::default();
  263. let permalinks_ctx = HashMap::new();
  264. let mut context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  265. context.highlight_code = false;
  266. let res = markdown_to_html("```\n$ gutenberg server\n```", &context).unwrap();
  267. assert_eq!(
  268. res.0,
  269. "<pre><code>$ gutenberg server\n</code></pre>\n"
  270. );
  271. }
  272. #[test]
  273. fn can_highlight_code_block_no_lang() {
  274. let tera_ctx = Tera::default();
  275. let permalinks_ctx = HashMap::new();
  276. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  277. let res = markdown_to_html("```\n$ gutenberg server\n$ ping\n```", &context).unwrap();
  278. assert_eq!(
  279. res.0,
  280. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">$ gutenberg server\n</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">$ ping\n</span></pre>"
  281. );
  282. }
  283. #[test]
  284. fn can_highlight_code_block_with_lang() {
  285. let tera_ctx = Tera::default();
  286. let permalinks_ctx = HashMap::new();
  287. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  288. let res = markdown_to_html("```python\nlist.append(1)\n```", &context).unwrap();
  289. assert_eq!(
  290. res.0,
  291. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">.</span><span style=\"background-color:#2b303b;color:#bf616a;\">append</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">(</span><span style=\"background-color:#2b303b;color:#d08770;\">1</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">)</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">\n</span></pre>"
  292. );
  293. }
  294. #[test]
  295. fn can_higlight_code_block_with_unknown_lang() {
  296. let tera_ctx = Tera::default();
  297. let permalinks_ctx = HashMap::new();
  298. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  299. let res = markdown_to_html("```yolo\nlist.append(1)\n```", &context).unwrap();
  300. // defaults to plain text
  301. assert_eq!(
  302. res.0,
  303. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list.append(1)\n</span></pre>"
  304. );
  305. }
  306. #[test]
  307. fn can_render_shortcode() {
  308. let permalinks_ctx = HashMap::new();
  309. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  310. let res = markdown_to_html(r#"
  311. Hello
  312. {{ youtube(id="ub36ffWAqgQ") }}
  313. "#, &context).unwrap();
  314. assert!(res.0.contains("<p>Hello</p>\n<div >"));
  315. assert!(res.0.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  316. }
  317. #[test]
  318. fn can_render_several_shortcode_in_row() {
  319. let permalinks_ctx = HashMap::new();
  320. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  321. let res = markdown_to_html(r#"
  322. Hello
  323. {{ youtube(id="ub36ffWAqgQ") }}
  324. {{ youtube(id="ub36ffWAqgQ", autoplay=true) }}
  325. {{ vimeo(id="210073083") }}
  326. {{ streamable(id="c0ic") }}
  327. {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}
  328. "#, &context).unwrap();
  329. assert!(res.0.contains("<p>Hello</p>\n<div >"));
  330. assert!(res.0.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  331. assert!(res.0.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ?autoplay=1""#));
  332. assert!(res.0.contains(r#"<iframe src="https://www.streamable.com/e/c0ic""#));
  333. assert!(res.0.contains(r#"//player.vimeo.com/video/210073083""#));
  334. }
  335. #[test]
  336. fn doesnt_render_shortcode_in_code_block() {
  337. let permalinks_ctx = HashMap::new();
  338. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  339. let res = markdown_to_html(r#"```{{ youtube(id="w7Ft2ymGmfc") }}```"#, &context).unwrap();
  340. assert_eq!(res.0, "<p><code>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</code></p>\n");
  341. }
  342. #[test]
  343. fn can_render_shortcode_with_body() {
  344. let mut tera = Tera::default();
  345. tera.extend(&GUTENBERG_TERA).unwrap();
  346. tera.add_raw_template("shortcodes/quote.html", "<blockquote>{{ body }} - {{ author}}</blockquote>").unwrap();
  347. let permalinks_ctx = HashMap::new();
  348. let context = Context::new(&tera, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  349. let res = markdown_to_html(r#"
  350. Hello
  351. {% quote(author="Keats") %}
  352. A quote
  353. {% end %}
  354. "#, &context).unwrap();
  355. assert_eq!(res.0, "<p>Hello\n</p><blockquote>A quote - Keats</blockquote>");
  356. }
  357. #[test]
  358. fn errors_rendering_unknown_shortcode() {
  359. let tera_ctx = Tera::default();
  360. let permalinks_ctx = HashMap::new();
  361. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  362. let res = markdown_to_html("{{ hello(flash=true) }}", &context);
  363. assert!(res.is_err());
  364. }
  365. #[test]
  366. fn can_make_valid_relative_link() {
  367. let mut permalinks = HashMap::new();
  368. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  369. let tera_ctx = Tera::default();
  370. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks, InsertAnchor::None);
  371. let res = markdown_to_html(
  372. r#"[rel link](./pages/about.md), [abs link](https://vincent.is/about)"#,
  373. &context
  374. ).unwrap();
  375. assert!(
  376. res.0.contains(r#"<p><a href="https://vincent.is/about">rel link</a>, <a href="https://vincent.is/about">abs link</a></p>"#)
  377. );
  378. }
  379. #[test]
  380. fn can_make_relative_links_with_anchors() {
  381. let mut permalinks = HashMap::new();
  382. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  383. let tera_ctx = Tera::default();
  384. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks, InsertAnchor::None);
  385. let res = markdown_to_html(r#"[rel link](./pages/about.md#cv)"#, &context).unwrap();
  386. assert!(
  387. res.0.contains(r#"<p><a href="https://vincent.is/about#cv">rel link</a></p>"#)
  388. );
  389. }
  390. #[test]
  391. fn errors_relative_link_inexistant() {
  392. let tera_ctx = Tera::default();
  393. let permalinks_ctx = HashMap::new();
  394. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  395. let res = markdown_to_html("[rel link](./pages/about.md)", &context);
  396. assert!(res.is_err());
  397. }
  398. #[test]
  399. fn can_add_id_to_headers() {
  400. let tera_ctx = Tera::default();
  401. let permalinks_ctx = HashMap::new();
  402. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  403. let res = markdown_to_html(r#"# Hello"#, &context).unwrap();
  404. assert_eq!(res.0, "<h1 id=\"hello\">Hello</h1>\n");
  405. }
  406. #[test]
  407. fn can_add_id_to_headers_same_slug() {
  408. let tera_ctx = Tera::default();
  409. let permalinks_ctx = HashMap::new();
  410. let context = Context::new(&tera_ctx, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::None);
  411. let res = markdown_to_html("# Hello\n# Hello", &context).unwrap();
  412. assert_eq!(res.0, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
  413. }
  414. #[test]
  415. fn can_insert_anchor_left() {
  416. let permalinks_ctx = HashMap::new();
  417. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::Left);
  418. let res = markdown_to_html("# Hello", &context).unwrap();
  419. assert_eq!(
  420. res.0,
  421. "<h1 id=\"hello\"><a class=\"gutenberg-anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello</h1>\n"
  422. );
  423. }
  424. #[test]
  425. fn can_insert_anchor_right() {
  426. let permalinks_ctx = HashMap::new();
  427. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::Right);
  428. let res = markdown_to_html("# Hello", &context).unwrap();
  429. assert_eq!(
  430. res.0,
  431. "<h1 id=\"hello\">Hello<a class=\"gutenberg-anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\n</h1>\n"
  432. );
  433. }
  434. // See https://github.com/Keats/gutenberg/issues/42
  435. #[test]
  436. fn can_insert_anchor_with_exclamation_mark() {
  437. let permalinks_ctx = HashMap::new();
  438. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::Left);
  439. let res = markdown_to_html("# Hello!", &context).unwrap();
  440. assert_eq!(
  441. res.0,
  442. "<h1 id=\"hello\"><a class=\"gutenberg-anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello!</h1>\n"
  443. );
  444. }
  445. // See https://github.com/Keats/gutenberg/issues/53
  446. #[test]
  447. fn can_insert_anchor_with_link() {
  448. let permalinks_ctx = HashMap::new();
  449. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::Left);
  450. let res = markdown_to_html("## [](#xresources)Xresources", &context).unwrap();
  451. assert_eq!(
  452. res.0,
  453. "<h2 id=\"xresources\"><a class=\"gutenberg-anchor\" href=\"#xresources\" aria-label=\"Anchor link for: xresources\">🔗</a>\nXresources</h2>\n"
  454. );
  455. }
  456. #[test]
  457. fn can_insert_anchor_with_other_special_chars() {
  458. let permalinks_ctx = HashMap::new();
  459. let context = Context::new(&GUTENBERG_TERA, true, "base16-ocean-dark".to_string(), "", &permalinks_ctx, InsertAnchor::Left);
  460. let res = markdown_to_html("# Hello*_()", &context).unwrap();
  461. assert_eq!(
  462. res.0,
  463. "<h1 id=\"hello\"><a class=\"gutenberg-anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello*_()</h1>\n"
  464. );
  465. }
  466. #[test]
  467. fn can_make_toc() {
  468. let permalinks_ctx = HashMap::new();
  469. let context = Context::new(
  470. &GUTENBERG_TERA,
  471. true,
  472. "base16-ocean-dark".to_string(),
  473. "https://mysite.com/something",
  474. &permalinks_ctx,
  475. InsertAnchor::Left
  476. );
  477. let res = markdown_to_html(r#"
  478. # Header 1
  479. ## Header 2
  480. ## Another Header 2
  481. ### Last one
  482. "#, &context).unwrap();
  483. let toc = res.1;
  484. assert_eq!(toc.len(), 1);
  485. assert_eq!(toc[0].children.len(), 2);
  486. assert_eq!(toc[0].children[1].children.len(), 1);
  487. }
  488. }