You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

561 lines
21KB

  1. use std::borrow::Cow::Owned;
  2. use std::collections::HashMap;
  3. use pulldown_cmark as cmark;
  4. use self::cmark::{Parser, Event, Tag, Options, OPTION_ENABLE_TABLES, OPTION_ENABLE_FOOTNOTES};
  5. use regex::Regex;
  6. use slug::slugify;
  7. use syntect::dumps::from_binary;
  8. use syntect::easy::HighlightLines;
  9. use syntect::parsing::SyntaxSet;
  10. use syntect::highlighting::ThemeSet;
  11. use syntect::html::{start_coloured_html_snippet, styles_to_coloured_html, IncludeBackground};
  12. use tera::{Tera, Context};
  13. use config::Config;
  14. use errors::{Result, ResultExt};
  15. use site::resolve_internal_link;
  16. // We need to put those in a struct to impl Send and sync
  17. pub struct Setup {
  18. pub syntax_set: SyntaxSet,
  19. pub theme_set: ThemeSet,
  20. }
  21. unsafe impl Send for Setup {}
  22. unsafe impl Sync for Setup {}
  23. lazy_static!{
  24. static ref SHORTCODE_RE: Regex = Regex::new(r#"\{(?:%|\{)\s+([[:alnum:]]+?)\(([[:alnum:]]+?="?.+?"?)\)\s+(?:%|\})\}"#).unwrap();
  25. pub static ref SETUP: Setup = Setup {
  26. syntax_set: {
  27. let mut ps: SyntaxSet = from_binary(include_bytes!("../sublime_syntaxes/newlines.packdump"));
  28. ps.link_syntaxes();
  29. ps
  30. },
  31. theme_set: from_binary(include_bytes!("../sublime_themes/all.themedump"))
  32. };
  33. }
  34. /// A shortcode that has a body
  35. /// Called by having some content like {% ... %} body {% end %}
  36. /// We need the struct to hold the data while we're processing the markdown
  37. #[derive(Debug)]
  38. struct ShortCode {
  39. name: String,
  40. args: HashMap<String, String>,
  41. body: String,
  42. }
  43. impl ShortCode {
  44. pub fn new(name: &str, args: HashMap<String, String>) -> ShortCode {
  45. ShortCode {
  46. name: name.to_string(),
  47. args: args,
  48. body: String::new(),
  49. }
  50. }
  51. pub fn append(&mut self, text: &str) {
  52. self.body.push_str(text)
  53. }
  54. pub fn render(&self, tera: &Tera) -> Result<String> {
  55. let mut context = Context::new();
  56. for (key, value) in &self.args {
  57. context.add(key, value);
  58. }
  59. context.add("body", &self.body);
  60. let tpl_name = format!("shortcodes/{}.html", self.name);
  61. tera.render(&tpl_name, &context)
  62. .chain_err(|| format!("Failed to render {} shortcode", self.name))
  63. }
  64. }
  65. /// Parse a shortcode without a body
  66. fn parse_shortcode(input: &str) -> (String, HashMap<String, String>) {
  67. let mut args = HashMap::new();
  68. let caps = SHORTCODE_RE.captures(input).unwrap();
  69. // caps[0] is the full match
  70. let name = &caps[1];
  71. let arg_list = &caps[2];
  72. for arg in arg_list.split(',') {
  73. let bits = arg.split('=').collect::<Vec<_>>();
  74. args.insert(bits[0].trim().to_string(), bits[1].replace("\"", ""));
  75. }
  76. (name.to_string(), args)
  77. }
  78. /// Renders a shortcode or return an error
  79. fn render_simple_shortcode(tera: &Tera, name: &str, args: &HashMap<String, String>) -> Result<String> {
  80. let mut context = Context::new();
  81. for (key, value) in args.iter() {
  82. context.add(key, value);
  83. }
  84. let tpl_name = format!("shortcodes/{}.html", name);
  85. tera.render(&tpl_name, &context).chain_err(|| format!("Failed to render {} shortcode", name))
  86. }
  87. pub fn markdown_to_html(content: &str, permalinks: &HashMap<String, String>, tera: &Tera, config: &Config) -> Result<String> {
  88. // We try to be smart about highlighting code as it can be time-consuming
  89. // If the global config disables it, then we do nothing. However,
  90. // if we see a code block in the content, we assume that this page needs
  91. // to be highlighted. It could potentially have false positive if the content
  92. // has ``` in it but that seems kind of unlikely
  93. let should_highlight = if config.highlight_code.unwrap() {
  94. content.contains("```")
  95. } else {
  96. false
  97. };
  98. let highlight_theme = config.highlight_theme.clone().unwrap();
  99. // Set while parsing
  100. let mut error = None;
  101. let mut highlighter: Option<HighlightLines> = None;
  102. let mut shortcode_block = None;
  103. // shortcodes live outside of paragraph so we need to ensure we don't close
  104. // a paragraph that has already been closed
  105. let mut added_shortcode = false;
  106. // Don't transform things that look like shortcodes in code blocks
  107. let mut in_code_block = false;
  108. // If we get text in header, we need to insert the id and a anchor
  109. let mut in_header = false;
  110. // pulldown_cmark can send several text events for a title if there are markdown
  111. // specific characters like `!` in them. We only want to insert the anchor the first time
  112. let mut header_already_inserted = false;
  113. // the rendered html
  114. let mut html = String::new();
  115. let mut anchors: Vec<String> = vec![];
  116. // We might have cases where the slug is already present in our list of anchor
  117. // for example an article could have several titles named Example
  118. // We add a counter after the slug if the slug is already present, which
  119. // means we will have example, example-1, example-2 etc
  120. fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
  121. if level == 0 && !anchors.contains(&name) {
  122. return name.to_string();
  123. }
  124. let new_anchor = format!("{}-{}", name, level + 1);
  125. if !anchors.contains(&new_anchor) {
  126. return new_anchor;
  127. }
  128. find_anchor(anchors, name, level + 1)
  129. }
  130. let mut opts = Options::empty();
  131. opts.insert(OPTION_ENABLE_TABLES);
  132. opts.insert(OPTION_ENABLE_FOOTNOTES);
  133. {
  134. let parser = Parser::new_ext(content, opts).map(|event| match event {
  135. Event::Text(text) => {
  136. // if we are in the middle of a code block
  137. if let Some(ref mut highlighter) = highlighter {
  138. let highlighted = &highlighter.highlight(&text);
  139. let html = styles_to_coloured_html(highlighted, IncludeBackground::Yes);
  140. return Event::Html(Owned(html));
  141. }
  142. if in_code_block {
  143. return Event::Text(text);
  144. }
  145. // Shortcode without body
  146. if shortcode_block.is_none() && text.starts_with("{{") && text.ends_with("}}") && SHORTCODE_RE.is_match(&text) {
  147. let (name, args) = parse_shortcode(&text);
  148. added_shortcode = true;
  149. match render_simple_shortcode(tera, &name, &args) {
  150. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  151. Err(e) => {
  152. error = Some(e);
  153. return Event::Html(Owned("".to_string()));
  154. }
  155. }
  156. // non-matching will be returned normally below
  157. }
  158. // Shortcode with a body
  159. if shortcode_block.is_none() && text.starts_with("{%") && text.ends_with("%}") {
  160. if SHORTCODE_RE.is_match(&text) {
  161. let (name, args) = parse_shortcode(&text);
  162. shortcode_block = Some(ShortCode::new(&name, args));
  163. }
  164. // Don't return anything
  165. return Event::Text(Owned("".to_string()));
  166. }
  167. // If we have some text while in a shortcode, it's either the body
  168. // or the end tag
  169. if shortcode_block.is_some() {
  170. if let Some(ref mut shortcode) = shortcode_block {
  171. if text.trim() == "{% end %}" {
  172. added_shortcode = true;
  173. match shortcode.render(tera) {
  174. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  175. Err(e) => {
  176. error = Some(e);
  177. return Event::Html(Owned("".to_string()));
  178. }
  179. }
  180. } else {
  181. shortcode.append(&text);
  182. return Event::Html(Owned("".to_string()));
  183. }
  184. }
  185. }
  186. if in_header {
  187. if header_already_inserted {
  188. return Event::Text(text);
  189. }
  190. let id = find_anchor(&anchors, slugify(&text), 0);
  191. anchors.push(id.clone());
  192. let anchor_link = if config.insert_anchor_links.unwrap() {
  193. let mut context = Context::new();
  194. context.add("id", &id);
  195. tera.render("anchor-link.html", &context).unwrap()
  196. } else {
  197. String::new()
  198. };
  199. header_already_inserted = true;
  200. return Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, anchor_link, text)));
  201. }
  202. // Business as usual
  203. Event::Text(text)
  204. },
  205. Event::Start(Tag::CodeBlock(ref info)) => {
  206. in_code_block = true;
  207. if !should_highlight {
  208. return Event::Html(Owned("<pre><code>".to_owned()));
  209. }
  210. let theme = &SETUP.theme_set.themes[&highlight_theme];
  211. let syntax = info
  212. .split(' ')
  213. .next()
  214. .and_then(|lang| SETUP.syntax_set.find_syntax_by_token(lang))
  215. .unwrap_or_else(|| SETUP.syntax_set.find_syntax_plain_text());
  216. highlighter = Some(HighlightLines::new(syntax, theme));
  217. let snippet = start_coloured_html_snippet(theme);
  218. Event::Html(Owned(snippet))
  219. },
  220. Event::End(Tag::CodeBlock(_)) => {
  221. in_code_block = false;
  222. if !should_highlight{
  223. return Event::Html(Owned("</code></pre>\n".to_owned()))
  224. }
  225. // reset highlight and close the code block
  226. highlighter = None;
  227. Event::Html(Owned("</pre>".to_owned()))
  228. },
  229. // Need to handle relative links
  230. Event::Start(Tag::Link(ref link, ref title)) => {
  231. if in_header {
  232. return Event::Html(Owned("".to_owned()));
  233. }
  234. if link.starts_with("./") {
  235. match resolve_internal_link(link, permalinks) {
  236. Ok(url) => {
  237. return Event::Start(Tag::Link(Owned(url), title.clone()));
  238. },
  239. Err(_) => {
  240. error = Some(format!("Relative link {} not found.", link).into());
  241. return Event::Html(Owned("".to_string()));
  242. }
  243. };
  244. }
  245. Event::Start(Tag::Link(link.clone(), title.clone()))
  246. },
  247. Event::End(Tag::Link(_, _)) => {
  248. if in_header {
  249. return Event::Html(Owned("".to_owned()));
  250. }
  251. event
  252. }
  253. // need to know when we are in a code block to disable shortcodes in them
  254. Event::Start(Tag::Code) => {
  255. in_code_block = true;
  256. event
  257. },
  258. Event::End(Tag::Code) => {
  259. in_code_block = false;
  260. event
  261. },
  262. Event::Start(Tag::Header(num)) => {
  263. in_header = true;
  264. // ugly eh
  265. Event::Html(Owned(format!("<h{} ", num)))
  266. },
  267. Event::End(Tag::Header(_)) => {
  268. in_header = false;
  269. header_already_inserted = false;
  270. event
  271. },
  272. // If we added shortcodes, don't close a paragraph since there's none
  273. Event::End(Tag::Paragraph) => {
  274. if added_shortcode {
  275. added_shortcode = false;
  276. return Event::Html(Owned("".to_owned()));
  277. }
  278. event
  279. },
  280. // Ignore softbreaks inside shortcodes
  281. Event::SoftBreak => {
  282. if shortcode_block.is_some() {
  283. return Event::Html(Owned("".to_owned()));
  284. }
  285. event
  286. },
  287. _ => {
  288. // println!("event = {:?}", event);
  289. event
  290. },
  291. });
  292. cmark::html::push_html(&mut html, parser);
  293. }
  294. match error {
  295. Some(e) => Err(e),
  296. None => Ok(html.replace("<p></p>", "")),
  297. }
  298. }
  299. #[cfg(test)]
  300. mod tests {
  301. use std::collections::HashMap;
  302. use templates::GUTENBERG_TERA;
  303. use tera::Tera;
  304. use config::Config;
  305. use super::{markdown_to_html, parse_shortcode};
  306. #[test]
  307. fn can_parse_simple_shortcode_one_arg() {
  308. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc") }}"#);
  309. assert_eq!(name, "youtube");
  310. assert_eq!(args["id"], "w7Ft2ymGmfc");
  311. }
  312. #[test]
  313. fn can_parse_simple_shortcode_several_arg() {
  314. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc", autoplay=true) }}"#);
  315. assert_eq!(name, "youtube");
  316. assert_eq!(args["id"], "w7Ft2ymGmfc");
  317. assert_eq!(args["autoplay"], "true");
  318. }
  319. #[test]
  320. fn can_parse_block_shortcode_several_arg() {
  321. let (name, args) = parse_shortcode(r#"{% youtube(id="w7Ft2ymGmfc", autoplay=true) %}"#);
  322. assert_eq!(name, "youtube");
  323. assert_eq!(args["id"], "w7Ft2ymGmfc");
  324. assert_eq!(args["autoplay"], "true");
  325. }
  326. #[test]
  327. fn can_do_markdown_to_html_simple() {
  328. let res = markdown_to_html("hello", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  329. assert_eq!(res, "<p>hello</p>\n");
  330. }
  331. #[test]
  332. fn doesnt_highlight_code_block_with_highlighting_off() {
  333. let mut config = Config::default();
  334. config.highlight_code = Some(false);
  335. let res = markdown_to_html("```\n$ gutenberg server\n```", &HashMap::new(), &Tera::default(), &config).unwrap();
  336. assert_eq!(
  337. res,
  338. "<pre><code>$ gutenberg server\n</code></pre>\n"
  339. );
  340. }
  341. #[test]
  342. fn can_highlight_code_block_no_lang() {
  343. let res = markdown_to_html("```\n$ gutenberg server\n$ ping\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  344. assert_eq!(
  345. res,
  346. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">$ gutenberg server\n</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">$ ping\n</span></pre>"
  347. );
  348. }
  349. #[test]
  350. fn can_highlight_code_block_with_lang() {
  351. let res = markdown_to_html("```python\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  352. assert_eq!(
  353. res,
  354. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">.</span><span style=\"background-color:#2b303b;color:#bf616a;\">append</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">(</span><span style=\"background-color:#2b303b;color:#d08770;\">1</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">)</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">\n</span></pre>"
  355. );
  356. }
  357. #[test]
  358. fn can_higlight_code_block_with_unknown_lang() {
  359. let res = markdown_to_html("```yolo\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  360. // defaults to plain text
  361. assert_eq!(
  362. res,
  363. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list.append(1)\n</span></pre>"
  364. );
  365. }
  366. #[test]
  367. fn can_render_shortcode() {
  368. let res = markdown_to_html(r#"
  369. Hello
  370. {{ youtube(id="ub36ffWAqgQ") }}
  371. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  372. assert!(res.contains("<p>Hello</p>\n<div >"));
  373. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  374. }
  375. #[test]
  376. fn can_render_several_shortcode_in_row() {
  377. let res = markdown_to_html(r#"
  378. Hello
  379. {{ youtube(id="ub36ffWAqgQ") }}
  380. {{ youtube(id="ub36ffWAqgQ", autoplay=true) }}
  381. {{ vimeo(id="210073083") }}
  382. {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}
  383. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  384. assert!(res.contains("<p>Hello</p>\n<div >"));
  385. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  386. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ?autoplay=1""#));
  387. assert!(res.contains(r#"//player.vimeo.com/video/210073083""#));
  388. }
  389. #[test]
  390. fn doesnt_render_shortcode_in_code_block() {
  391. let res = markdown_to_html(r#"```{{ youtube(id="w7Ft2ymGmfc") }}```"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  392. assert_eq!(res, "<p><code>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</code></p>\n");
  393. }
  394. #[test]
  395. fn can_render_shortcode_with_body() {
  396. let mut tera = Tera::default();
  397. tera.extend(&GUTENBERG_TERA).unwrap();
  398. tera.add_raw_template("shortcodes/quote.html", "<blockquote>{{ body }} - {{ author}}</blockquote>").unwrap();
  399. let res = markdown_to_html(r#"
  400. Hello
  401. {% quote(author="Keats") %}
  402. A quote
  403. {% end %}
  404. "#, &HashMap::new(), &tera, &Config::default()).unwrap();
  405. assert_eq!(res, "<p>Hello\n</p><blockquote>A quote - Keats</blockquote>");
  406. }
  407. #[test]
  408. fn errors_rendering_unknown_shortcode() {
  409. let res = markdown_to_html("{{ hello(flash=true) }}", &HashMap::new(), &Tera::default(), &Config::default());
  410. assert!(res.is_err());
  411. }
  412. #[test]
  413. fn can_make_valid_relative_link() {
  414. let mut permalinks = HashMap::new();
  415. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  416. let res = markdown_to_html(
  417. r#"[rel link](./pages/about.md), [abs link](https://vincent.is/about)"#,
  418. &permalinks,
  419. &GUTENBERG_TERA,
  420. &Config::default()
  421. ).unwrap();
  422. assert!(
  423. res.contains(r#"<p><a href="https://vincent.is/about">rel link</a>, <a href="https://vincent.is/about">abs link</a></p>"#)
  424. );
  425. }
  426. #[test]
  427. fn can_make_relative_links_with_anchors() {
  428. let mut permalinks = HashMap::new();
  429. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  430. let res = markdown_to_html(
  431. r#"[rel link](./pages/about.md#cv)"#,
  432. &permalinks,
  433. &GUTENBERG_TERA,
  434. &Config::default()
  435. ).unwrap();
  436. assert!(
  437. res.contains(r#"<p><a href="https://vincent.is/about#cv">rel link</a></p>"#)
  438. );
  439. }
  440. #[test]
  441. fn errors_relative_link_inexistant() {
  442. let res = markdown_to_html("[rel link](./pages/about.md)", &HashMap::new(), &Tera::default(), &Config::default());
  443. assert!(res.is_err());
  444. }
  445. #[test]
  446. fn can_add_id_to_headers() {
  447. let res = markdown_to_html(r#"# Hello"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  448. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n");
  449. }
  450. #[test]
  451. fn can_add_id_to_headers_same_slug() {
  452. let res = markdown_to_html("# Hello\n# Hello", &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  453. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
  454. }
  455. #[test]
  456. fn can_insert_anchor() {
  457. let mut config = Config::default();
  458. config.insert_anchor_links = Some(true);
  459. let res = markdown_to_html("# Hello", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  460. assert_eq!(
  461. res,
  462. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello</h1>\n"
  463. );
  464. }
  465. // See https://github.com/Keats/gutenberg/issues/42
  466. #[test]
  467. fn can_insert_anchor_with_exclamation_mark() {
  468. let mut config = Config::default();
  469. config.insert_anchor_links = Some(true);
  470. let res = markdown_to_html("# Hello!", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  471. assert_eq!(
  472. res,
  473. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello!</h1>\n"
  474. );
  475. }
  476. // See https://github.com/Keats/gutenberg/issues/53
  477. #[test]
  478. fn can_insert_anchor_with_link() {
  479. let mut config = Config::default();
  480. config.insert_anchor_links = Some(true);
  481. let res = markdown_to_html("## [](#xresources)Xresources", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  482. assert_eq!(
  483. res,
  484. "<h2 id=\"xresources\"><a class=\"anchor\" href=\"#xresources\" aria-label=\"Anchor link for: xresources\">🔗</a>\nXresources</h2>\n"
  485. );
  486. }
  487. #[test]
  488. fn can_insert_anchor_with_other_special_chars() {
  489. let mut config = Config::default();
  490. config.insert_anchor_links = Some(true);
  491. let res = markdown_to_html("# Hello*_()", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  492. assert_eq!(
  493. res,
  494. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello*_()</h1>\n"
  495. );
  496. }
  497. }