You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

498 lines
19KB

  1. use std::borrow::Cow::Owned;
  2. use std::collections::HashMap;
  3. use pulldown_cmark as cmark;
  4. use self::cmark::{Parser, Event, Tag, Options, OPTION_ENABLE_TABLES, OPTION_ENABLE_FOOTNOTES};
  5. use regex::Regex;
  6. use slug::slugify;
  7. use syntect::dumps::from_binary;
  8. use syntect::easy::HighlightLines;
  9. use syntect::parsing::SyntaxSet;
  10. use syntect::html::{start_coloured_html_snippet, styles_to_coloured_html, IncludeBackground};
  11. use tera::{Tera, Context};
  12. use config::Config;
  13. use errors::{Result};
  14. use site::resolve_internal_link;
  15. use rendering::highlighting::THEME_SET;
  16. use rendering::short_code::{ShortCode, parse_shortcode, render_simple_shortcode};
  17. // We need to put those in a struct to impl Send and sync
  18. pub struct Setup {
  19. pub syntax_set: SyntaxSet,
  20. }
  21. unsafe impl Send for Setup {}
  22. unsafe impl Sync for Setup {}
  23. lazy_static!{
  24. static ref SHORTCODE_RE: Regex = Regex::new(r#"\{(?:%|\{)\s+([[:alnum:]]+?)\(([[:alnum:]]+?="?.+?"?)\)\s+(?:%|\})\}"#).unwrap();
  25. pub static ref SETUP: Setup = Setup {
  26. syntax_set: {
  27. let mut ps: SyntaxSet = from_binary(include_bytes!("../../sublime_syntaxes/newlines.packdump"));
  28. ps.link_syntaxes();
  29. ps
  30. },
  31. };
  32. }
  33. pub fn markdown_to_html(content: &str, permalinks: &HashMap<String, String>, tera: &Tera, config: &Config) -> Result<String> {
  34. // We try to be smart about highlighting code as it can be time-consuming
  35. // If the global config disables it, then we do nothing. However,
  36. // if we see a code block in the content, we assume that this page needs
  37. // to be highlighted. It could potentially have false positive if the content
  38. // has ``` in it but that seems kind of unlikely
  39. let should_highlight = if config.highlight_code.unwrap() {
  40. content.contains("```")
  41. } else {
  42. false
  43. };
  44. let highlight_theme = config.highlight_theme.clone().unwrap();
  45. // Set while parsing
  46. let mut error = None;
  47. let mut highlighter: Option<HighlightLines> = None;
  48. let mut shortcode_block = None;
  49. // shortcodes live outside of paragraph so we need to ensure we don't close
  50. // a paragraph that has already been closed
  51. let mut added_shortcode = false;
  52. // Don't transform things that look like shortcodes in code blocks
  53. let mut in_code_block = false;
  54. // If we get text in header, we need to insert the id and a anchor
  55. let mut in_header = false;
  56. // pulldown_cmark can send several text events for a title if there are markdown
  57. // specific characters like `!` in them. We only want to insert the anchor the first time
  58. let mut header_already_inserted = false;
  59. // the rendered html
  60. let mut html = String::new();
  61. let mut anchors: Vec<String> = vec![];
  62. // We might have cases where the slug is already present in our list of anchor
  63. // for example an article could have several titles named Example
  64. // We add a counter after the slug if the slug is already present, which
  65. // means we will have example, example-1, example-2 etc
  66. fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
  67. if level == 0 && !anchors.contains(&name) {
  68. return name.to_string();
  69. }
  70. let new_anchor = format!("{}-{}", name, level + 1);
  71. if !anchors.contains(&new_anchor) {
  72. return new_anchor;
  73. }
  74. find_anchor(anchors, name, level + 1)
  75. }
  76. let mut opts = Options::empty();
  77. opts.insert(OPTION_ENABLE_TABLES);
  78. opts.insert(OPTION_ENABLE_FOOTNOTES);
  79. {
  80. let parser = Parser::new_ext(content, opts).map(|event| match event {
  81. Event::Text(text) => {
  82. // if we are in the middle of a code block
  83. if let Some(ref mut highlighter) = highlighter {
  84. let highlighted = &highlighter.highlight(&text);
  85. let html = styles_to_coloured_html(highlighted, IncludeBackground::Yes);
  86. return Event::Html(Owned(html));
  87. }
  88. if in_code_block {
  89. return Event::Text(text);
  90. }
  91. // Shortcode without body
  92. if shortcode_block.is_none() && text.starts_with("{{") && text.ends_with("}}") && SHORTCODE_RE.is_match(&text) {
  93. let (name, args) = parse_shortcode(&text);
  94. added_shortcode = true;
  95. match render_simple_shortcode(tera, &name, &args) {
  96. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  97. Err(e) => {
  98. error = Some(e);
  99. return Event::Html(Owned("".to_string()));
  100. }
  101. }
  102. // non-matching will be returned normally below
  103. }
  104. // Shortcode with a body
  105. if shortcode_block.is_none() && text.starts_with("{%") && text.ends_with("%}") {
  106. if SHORTCODE_RE.is_match(&text) {
  107. let (name, args) = parse_shortcode(&text);
  108. shortcode_block = Some(ShortCode::new(&name, args));
  109. }
  110. // Don't return anything
  111. return Event::Text(Owned("".to_string()));
  112. }
  113. // If we have some text while in a shortcode, it's either the body
  114. // or the end tag
  115. if shortcode_block.is_some() {
  116. if let Some(ref mut shortcode) = shortcode_block {
  117. if text.trim() == "{% end %}" {
  118. added_shortcode = true;
  119. match shortcode.render(tera) {
  120. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  121. Err(e) => {
  122. error = Some(e);
  123. return Event::Html(Owned("".to_string()));
  124. }
  125. }
  126. } else {
  127. shortcode.append(&text);
  128. return Event::Html(Owned("".to_string()));
  129. }
  130. }
  131. }
  132. if in_header {
  133. if header_already_inserted {
  134. return Event::Text(text);
  135. }
  136. let id = find_anchor(&anchors, slugify(&text), 0);
  137. anchors.push(id.clone());
  138. let anchor_link = if config.insert_anchor_links.unwrap() {
  139. let mut context = Context::new();
  140. context.add("id", &id);
  141. tera.render("anchor-link.html", &context).unwrap()
  142. } else {
  143. String::new()
  144. };
  145. header_already_inserted = true;
  146. return Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, anchor_link, text)));
  147. }
  148. // Business as usual
  149. Event::Text(text)
  150. },
  151. Event::Start(Tag::CodeBlock(ref info)) => {
  152. in_code_block = true;
  153. if !should_highlight {
  154. return Event::Html(Owned("<pre><code>".to_owned()));
  155. }
  156. let theme = &THEME_SET.themes[&highlight_theme];
  157. let syntax = info
  158. .split(' ')
  159. .next()
  160. .and_then(|lang| SETUP.syntax_set.find_syntax_by_token(lang))
  161. .unwrap_or_else(|| SETUP.syntax_set.find_syntax_plain_text());
  162. highlighter = Some(HighlightLines::new(syntax, theme));
  163. let snippet = start_coloured_html_snippet(theme);
  164. Event::Html(Owned(snippet))
  165. },
  166. Event::End(Tag::CodeBlock(_)) => {
  167. in_code_block = false;
  168. if !should_highlight{
  169. return Event::Html(Owned("</code></pre>\n".to_owned()))
  170. }
  171. // reset highlight and close the code block
  172. highlighter = None;
  173. Event::Html(Owned("</pre>".to_owned()))
  174. },
  175. // Need to handle relative links
  176. Event::Start(Tag::Link(ref link, ref title)) => {
  177. if in_header {
  178. return Event::Html(Owned("".to_owned()));
  179. }
  180. if link.starts_with("./") {
  181. match resolve_internal_link(link, permalinks) {
  182. Ok(url) => {
  183. return Event::Start(Tag::Link(Owned(url), title.clone()));
  184. },
  185. Err(_) => {
  186. error = Some(format!("Relative link {} not found.", link).into());
  187. return Event::Html(Owned("".to_string()));
  188. }
  189. };
  190. }
  191. Event::Start(Tag::Link(link.clone(), title.clone()))
  192. },
  193. Event::End(Tag::Link(_, _)) => {
  194. if in_header {
  195. return Event::Html(Owned("".to_owned()));
  196. }
  197. event
  198. }
  199. // need to know when we are in a code block to disable shortcodes in them
  200. Event::Start(Tag::Code) => {
  201. in_code_block = true;
  202. event
  203. },
  204. Event::End(Tag::Code) => {
  205. in_code_block = false;
  206. event
  207. },
  208. Event::Start(Tag::Header(num)) => {
  209. in_header = true;
  210. // ugly eh
  211. Event::Html(Owned(format!("<h{} ", num)))
  212. },
  213. Event::End(Tag::Header(_)) => {
  214. in_header = false;
  215. header_already_inserted = false;
  216. event
  217. },
  218. // If we added shortcodes, don't close a paragraph since there's none
  219. Event::End(Tag::Paragraph) => {
  220. if added_shortcode {
  221. added_shortcode = false;
  222. return Event::Html(Owned("".to_owned()));
  223. }
  224. event
  225. },
  226. // Ignore softbreaks inside shortcodes
  227. Event::SoftBreak => {
  228. if shortcode_block.is_some() {
  229. return Event::Html(Owned("".to_owned()));
  230. }
  231. event
  232. },
  233. _ => {
  234. // println!("event = {:?}", event);
  235. event
  236. },
  237. });
  238. cmark::html::push_html(&mut html, parser);
  239. }
  240. match error {
  241. Some(e) => Err(e),
  242. None => Ok(html.replace("<p></p>", "")),
  243. }
  244. }
  245. #[cfg(test)]
  246. mod tests {
  247. use std::collections::HashMap;
  248. use templates::GUTENBERG_TERA;
  249. use tera::Tera;
  250. use config::Config;
  251. use super::{markdown_to_html, parse_shortcode};
  252. #[test]
  253. fn can_parse_simple_shortcode_one_arg() {
  254. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc") }}"#);
  255. assert_eq!(name, "youtube");
  256. assert_eq!(args["id"], "w7Ft2ymGmfc");
  257. }
  258. #[test]
  259. fn can_parse_simple_shortcode_several_arg() {
  260. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc", autoplay=true) }}"#);
  261. assert_eq!(name, "youtube");
  262. assert_eq!(args["id"], "w7Ft2ymGmfc");
  263. assert_eq!(args["autoplay"], "true");
  264. }
  265. #[test]
  266. fn can_parse_block_shortcode_several_arg() {
  267. let (name, args) = parse_shortcode(r#"{% youtube(id="w7Ft2ymGmfc", autoplay=true) %}"#);
  268. assert_eq!(name, "youtube");
  269. assert_eq!(args["id"], "w7Ft2ymGmfc");
  270. assert_eq!(args["autoplay"], "true");
  271. }
  272. #[test]
  273. fn can_do_markdown_to_html_simple() {
  274. let res = markdown_to_html("hello", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  275. assert_eq!(res, "<p>hello</p>\n");
  276. }
  277. #[test]
  278. fn doesnt_highlight_code_block_with_highlighting_off() {
  279. let mut config = Config::default();
  280. config.highlight_code = Some(false);
  281. let res = markdown_to_html("```\n$ gutenberg server\n```", &HashMap::new(), &Tera::default(), &config).unwrap();
  282. assert_eq!(
  283. res,
  284. "<pre><code>$ gutenberg server\n</code></pre>\n"
  285. );
  286. }
  287. #[test]
  288. fn can_highlight_code_block_no_lang() {
  289. let res = markdown_to_html("```\n$ gutenberg server\n$ ping\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  290. assert_eq!(
  291. res,
  292. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">$ gutenberg server\n</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">$ ping\n</span></pre>"
  293. );
  294. }
  295. #[test]
  296. fn can_highlight_code_block_with_lang() {
  297. let res = markdown_to_html("```python\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  298. assert_eq!(
  299. res,
  300. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">.</span><span style=\"background-color:#2b303b;color:#bf616a;\">append</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">(</span><span style=\"background-color:#2b303b;color:#d08770;\">1</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">)</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">\n</span></pre>"
  301. );
  302. }
  303. #[test]
  304. fn can_higlight_code_block_with_unknown_lang() {
  305. let res = markdown_to_html("```yolo\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  306. // defaults to plain text
  307. assert_eq!(
  308. res,
  309. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list.append(1)\n</span></pre>"
  310. );
  311. }
  312. #[test]
  313. fn can_render_shortcode() {
  314. let res = markdown_to_html(r#"
  315. Hello
  316. {{ youtube(id="ub36ffWAqgQ") }}
  317. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  318. assert!(res.contains("<p>Hello</p>\n<div >"));
  319. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  320. }
  321. #[test]
  322. fn can_render_several_shortcode_in_row() {
  323. let res = markdown_to_html(r#"
  324. Hello
  325. {{ youtube(id="ub36ffWAqgQ") }}
  326. {{ youtube(id="ub36ffWAqgQ", autoplay=true) }}
  327. {{ vimeo(id="210073083") }}
  328. {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}
  329. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  330. assert!(res.contains("<p>Hello</p>\n<div >"));
  331. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  332. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ?autoplay=1""#));
  333. assert!(res.contains(r#"//player.vimeo.com/video/210073083""#));
  334. }
  335. #[test]
  336. fn doesnt_render_shortcode_in_code_block() {
  337. let res = markdown_to_html(r#"```{{ youtube(id="w7Ft2ymGmfc") }}```"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  338. assert_eq!(res, "<p><code>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</code></p>\n");
  339. }
  340. #[test]
  341. fn can_render_shortcode_with_body() {
  342. let mut tera = Tera::default();
  343. tera.extend(&GUTENBERG_TERA).unwrap();
  344. tera.add_raw_template("shortcodes/quote.html", "<blockquote>{{ body }} - {{ author}}</blockquote>").unwrap();
  345. let res = markdown_to_html(r#"
  346. Hello
  347. {% quote(author="Keats") %}
  348. A quote
  349. {% end %}
  350. "#, &HashMap::new(), &tera, &Config::default()).unwrap();
  351. assert_eq!(res, "<p>Hello\n</p><blockquote>A quote - Keats</blockquote>");
  352. }
  353. #[test]
  354. fn errors_rendering_unknown_shortcode() {
  355. let res = markdown_to_html("{{ hello(flash=true) }}", &HashMap::new(), &Tera::default(), &Config::default());
  356. assert!(res.is_err());
  357. }
  358. #[test]
  359. fn can_make_valid_relative_link() {
  360. let mut permalinks = HashMap::new();
  361. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  362. let res = markdown_to_html(
  363. r#"[rel link](./pages/about.md), [abs link](https://vincent.is/about)"#,
  364. &permalinks,
  365. &GUTENBERG_TERA,
  366. &Config::default()
  367. ).unwrap();
  368. assert!(
  369. res.contains(r#"<p><a href="https://vincent.is/about">rel link</a>, <a href="https://vincent.is/about">abs link</a></p>"#)
  370. );
  371. }
  372. #[test]
  373. fn can_make_relative_links_with_anchors() {
  374. let mut permalinks = HashMap::new();
  375. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  376. let res = markdown_to_html(
  377. r#"[rel link](./pages/about.md#cv)"#,
  378. &permalinks,
  379. &GUTENBERG_TERA,
  380. &Config::default()
  381. ).unwrap();
  382. assert!(
  383. res.contains(r#"<p><a href="https://vincent.is/about#cv">rel link</a></p>"#)
  384. );
  385. }
  386. #[test]
  387. fn errors_relative_link_inexistant() {
  388. let res = markdown_to_html("[rel link](./pages/about.md)", &HashMap::new(), &Tera::default(), &Config::default());
  389. assert!(res.is_err());
  390. }
  391. #[test]
  392. fn can_add_id_to_headers() {
  393. let res = markdown_to_html(r#"# Hello"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  394. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n");
  395. }
  396. #[test]
  397. fn can_add_id_to_headers_same_slug() {
  398. let res = markdown_to_html("# Hello\n# Hello", &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  399. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
  400. }
  401. #[test]
  402. fn can_insert_anchor() {
  403. let mut config = Config::default();
  404. config.insert_anchor_links = Some(true);
  405. let res = markdown_to_html("# Hello", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  406. assert_eq!(
  407. res,
  408. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello</h1>\n"
  409. );
  410. }
  411. // See https://github.com/Keats/gutenberg/issues/42
  412. #[test]
  413. fn can_insert_anchor_with_exclamation_mark() {
  414. let mut config = Config::default();
  415. config.insert_anchor_links = Some(true);
  416. let res = markdown_to_html("# Hello!", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  417. assert_eq!(
  418. res,
  419. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello!</h1>\n"
  420. );
  421. }
  422. // See https://github.com/Keats/gutenberg/issues/53
  423. #[test]
  424. fn can_insert_anchor_with_link() {
  425. let mut config = Config::default();
  426. config.insert_anchor_links = Some(true);
  427. let res = markdown_to_html("## [](#xresources)Xresources", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  428. assert_eq!(
  429. res,
  430. "<h2 id=\"xresources\"><a class=\"anchor\" href=\"#xresources\" aria-label=\"Anchor link for: xresources\">🔗</a>\nXresources</h2>\n"
  431. );
  432. }
  433. #[test]
  434. fn can_insert_anchor_with_other_special_chars() {
  435. let mut config = Config::default();
  436. config.insert_anchor_links = Some(true);
  437. let res = markdown_to_html("# Hello*_()", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  438. assert_eq!(
  439. res,
  440. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello*_()</h1>\n"
  441. );
  442. }
  443. }