You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

570 lines
22KB

  1. use std::borrow::Cow::Owned;
  2. use std::collections::HashMap;
  3. use pulldown_cmark as cmark;
  4. use self::cmark::{Parser, Event, Tag, Options, OPTION_ENABLE_TABLES, OPTION_ENABLE_FOOTNOTES};
  5. use regex::Regex;
  6. use slug::slugify;
  7. use syntect::dumps::from_binary;
  8. use syntect::easy::HighlightLines;
  9. use syntect::parsing::SyntaxSet;
  10. use syntect::highlighting::ThemeSet;
  11. use syntect::html::{start_coloured_html_snippet, styles_to_coloured_html, IncludeBackground};
  12. use tera::{Tera, Context};
  13. use config::Config;
  14. use errors::{Result, ResultExt};
  15. // We need to put those in a struct to impl Send and sync
  16. pub struct Setup {
  17. pub syntax_set: SyntaxSet,
  18. pub theme_set: ThemeSet,
  19. }
  20. unsafe impl Send for Setup {}
  21. unsafe impl Sync for Setup {}
  22. lazy_static!{
  23. static ref SHORTCODE_RE: Regex = Regex::new(r#"\{(?:%|\{)\s+([[:alnum:]]+?)\(([[:alnum:]]+?="?.+?"?)\)\s+(?:%|\})\}"#).unwrap();
  24. pub static ref SETUP: Setup = Setup {
  25. syntax_set: {
  26. let mut ps: SyntaxSet = from_binary(include_bytes!("../sublime_syntaxes/newlines.packdump"));
  27. ps.link_syntaxes();
  28. ps
  29. },
  30. theme_set: from_binary(include_bytes!("../sublime_themes/all.themedump"))
  31. };
  32. }
  33. /// A shortcode that has a body
  34. /// Called by having some content like {% ... %} body {% end %}
  35. /// We need the struct to hold the data while we're processing the markdown
  36. #[derive(Debug)]
  37. struct ShortCode {
  38. name: String,
  39. args: HashMap<String, String>,
  40. body: String,
  41. }
  42. impl ShortCode {
  43. pub fn new(name: &str, args: HashMap<String, String>) -> ShortCode {
  44. ShortCode {
  45. name: name.to_string(),
  46. args: args,
  47. body: String::new(),
  48. }
  49. }
  50. pub fn append(&mut self, text: &str) {
  51. self.body.push_str(text)
  52. }
  53. pub fn render(&self, tera: &Tera) -> Result<String> {
  54. let mut context = Context::new();
  55. for (key, value) in &self.args {
  56. context.add(key, value);
  57. }
  58. context.add("body", &self.body);
  59. let tpl_name = format!("shortcodes/{}.html", self.name);
  60. tera.render(&tpl_name, &context)
  61. .chain_err(|| format!("Failed to render {} shortcode", self.name))
  62. }
  63. }
  64. /// Parse a shortcode without a body
  65. fn parse_shortcode(input: &str) -> (String, HashMap<String, String>) {
  66. let mut args = HashMap::new();
  67. let caps = SHORTCODE_RE.captures(input).unwrap();
  68. // caps[0] is the full match
  69. let name = &caps[1];
  70. let arg_list = &caps[2];
  71. for arg in arg_list.split(',') {
  72. let bits = arg.split('=').collect::<Vec<_>>();
  73. args.insert(bits[0].trim().to_string(), bits[1].replace("\"", ""));
  74. }
  75. (name.to_string(), args)
  76. }
  77. /// Renders a shortcode or return an error
  78. fn render_simple_shortcode(tera: &Tera, name: &str, args: &HashMap<String, String>) -> Result<String> {
  79. let mut context = Context::new();
  80. for (key, value) in args.iter() {
  81. context.add(key, value);
  82. }
  83. let tpl_name = format!("shortcodes/{}.html", name);
  84. tera.render(&tpl_name, &context).chain_err(|| format!("Failed to render {} shortcode", name))
  85. }
  86. pub fn markdown_to_html(content: &str, permalinks: &HashMap<String, String>, tera: &Tera, config: &Config) -> Result<String> {
  87. // We try to be smart about highlighting code as it can be time-consuming
  88. // If the global config disables it, then we do nothing. However,
  89. // if we see a code block in the content, we assume that this page needs
  90. // to be highlighted. It could potentially have false positive if the content
  91. // has ``` in it but that seems kind of unlikely
  92. let should_highlight = if config.highlight_code.unwrap() {
  93. content.contains("```")
  94. } else {
  95. false
  96. };
  97. let highlight_theme = config.highlight_theme.clone().unwrap();
  98. // Set while parsing
  99. let mut error = None;
  100. let mut highlighter: Option<HighlightLines> = None;
  101. let mut shortcode_block = None;
  102. // shortcodes live outside of paragraph so we need to ensure we don't close
  103. // a paragraph that has already been closed
  104. let mut added_shortcode = false;
  105. // Don't transform things that look like shortcodes in code blocks
  106. let mut in_code_block = false;
  107. // If we get text in header, we need to insert the id and a anchor
  108. let mut in_header = false;
  109. // pulldown_cmark can send several text events for a title if there are markdown
  110. // specific characters like `!` in them. We only want to insert the anchor the first time
  111. let mut header_already_inserted = false;
  112. // the rendered html
  113. let mut html = String::new();
  114. let mut anchors: Vec<String> = vec![];
  115. // We might have cases where the slug is already present in our list of anchor
  116. // for example an article could have several titles named Example
  117. // We add a counter after the slug if the slug is already present, which
  118. // means we will have example, example-1, example-2 etc
  119. fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
  120. if level == 0 && !anchors.contains(&name) {
  121. return name.to_string();
  122. }
  123. let new_anchor = format!("{}-{}", name, level + 1);
  124. if !anchors.contains(&new_anchor) {
  125. return new_anchor;
  126. }
  127. find_anchor(anchors, name, level + 1)
  128. }
  129. let mut opts = Options::empty();
  130. opts.insert(OPTION_ENABLE_TABLES);
  131. opts.insert(OPTION_ENABLE_FOOTNOTES);
  132. {
  133. let parser = Parser::new_ext(content, opts).map(|event| match event {
  134. Event::Text(text) => {
  135. // if we are in the middle of a code block
  136. if let Some(ref mut highlighter) = highlighter {
  137. let highlighted = &highlighter.highlight(&text);
  138. let html = styles_to_coloured_html(highlighted, IncludeBackground::Yes);
  139. return Event::Html(Owned(html));
  140. }
  141. if in_code_block {
  142. return Event::Text(text);
  143. }
  144. // Shortcode without body
  145. if shortcode_block.is_none() && text.starts_with("{{") && text.ends_with("}}") && SHORTCODE_RE.is_match(&text) {
  146. let (name, args) = parse_shortcode(&text);
  147. added_shortcode = true;
  148. match render_simple_shortcode(tera, &name, &args) {
  149. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  150. Err(e) => {
  151. error = Some(e);
  152. return Event::Html(Owned("".to_string()));
  153. }
  154. }
  155. // non-matching will be returned normally below
  156. }
  157. // Shortcode with a body
  158. if shortcode_block.is_none() && text.starts_with("{%") && text.ends_with("%}") {
  159. if SHORTCODE_RE.is_match(&text) {
  160. let (name, args) = parse_shortcode(&text);
  161. shortcode_block = Some(ShortCode::new(&name, args));
  162. }
  163. // Don't return anything
  164. return Event::Text(Owned("".to_string()));
  165. }
  166. // If we have some text while in a shortcode, it's either the body
  167. // or the end tag
  168. if shortcode_block.is_some() {
  169. if let Some(ref mut shortcode) = shortcode_block {
  170. if text.trim() == "{% end %}" {
  171. added_shortcode = true;
  172. match shortcode.render(tera) {
  173. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  174. Err(e) => {
  175. error = Some(e);
  176. return Event::Html(Owned("".to_string()));
  177. }
  178. }
  179. } else {
  180. shortcode.append(&text);
  181. return Event::Html(Owned("".to_string()));
  182. }
  183. }
  184. }
  185. if in_header {
  186. if header_already_inserted {
  187. return Event::Text(text);
  188. }
  189. let id = find_anchor(&anchors, slugify(&text), 0);
  190. anchors.push(id.clone());
  191. let anchor_link = if config.insert_anchor_links.unwrap() {
  192. let mut context = Context::new();
  193. context.add("id", &id);
  194. tera.render("anchor-link.html", &context).unwrap()
  195. } else {
  196. String::new()
  197. };
  198. header_already_inserted = true;
  199. return Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, anchor_link, text)));
  200. }
  201. // Business as usual
  202. Event::Text(text)
  203. },
  204. Event::Start(Tag::CodeBlock(ref info)) => {
  205. in_code_block = true;
  206. if !should_highlight {
  207. return Event::Html(Owned("<pre><code>".to_owned()));
  208. }
  209. let theme = &SETUP.theme_set.themes[&highlight_theme];
  210. let syntax = info
  211. .split(' ')
  212. .next()
  213. .and_then(|lang| SETUP.syntax_set.find_syntax_by_token(lang))
  214. .unwrap_or_else(|| SETUP.syntax_set.find_syntax_plain_text());
  215. highlighter = Some(HighlightLines::new(syntax, theme));
  216. let snippet = start_coloured_html_snippet(theme);
  217. Event::Html(Owned(snippet))
  218. },
  219. Event::End(Tag::CodeBlock(_)) => {
  220. in_code_block = false;
  221. if !should_highlight{
  222. return Event::Html(Owned("</code></pre>\n".to_owned()))
  223. }
  224. // reset highlight and close the code block
  225. highlighter = None;
  226. Event::Html(Owned("</pre>".to_owned()))
  227. },
  228. // Need to handle relative links
  229. Event::Start(Tag::Link(ref link, ref title)) => {
  230. if in_header {
  231. return Event::Html(Owned("".to_owned()));
  232. }
  233. if link.starts_with("./") {
  234. // First we remove the ./ since that's gutenberg specific
  235. let clean_link = link.replacen("./", "", 1);
  236. // Then we remove any potential anchor
  237. // parts[0] will be the file path and parts[1] the anchor if present
  238. let parts = clean_link.split('#').collect::<Vec<_>>();
  239. match permalinks.get(parts[0]) {
  240. Some(p) => {
  241. let url = if parts.len() > 1 {
  242. format!("{}#{}", p, parts[1])
  243. } else {
  244. p.to_string()
  245. };
  246. return Event::Start(Tag::Link(Owned(url), title.clone()));
  247. },
  248. None => {
  249. error = Some(format!("Relative link {} not found.", link).into());
  250. return Event::Html(Owned("".to_string()));
  251. }
  252. };
  253. }
  254. Event::Start(Tag::Link(link.clone(), title.clone()))
  255. },
  256. Event::End(Tag::Link(_, _)) => {
  257. if in_header {
  258. return Event::Html(Owned("".to_owned()));
  259. }
  260. event
  261. }
  262. // need to know when we are in a code block to disable shortcodes in them
  263. Event::Start(Tag::Code) => {
  264. in_code_block = true;
  265. event
  266. },
  267. Event::End(Tag::Code) => {
  268. in_code_block = false;
  269. event
  270. },
  271. Event::Start(Tag::Header(num)) => {
  272. in_header = true;
  273. // ugly eh
  274. Event::Html(Owned(format!("<h{} ", num)))
  275. },
  276. Event::End(Tag::Header(_)) => {
  277. in_header = false;
  278. header_already_inserted = false;
  279. event
  280. },
  281. // If we added shortcodes, don't close a paragraph since there's none
  282. Event::End(Tag::Paragraph) => {
  283. if added_shortcode {
  284. added_shortcode = false;
  285. return Event::Html(Owned("".to_owned()));
  286. }
  287. event
  288. },
  289. // Ignore softbreaks inside shortcodes
  290. Event::SoftBreak => {
  291. if shortcode_block.is_some() {
  292. return Event::Html(Owned("".to_owned()));
  293. }
  294. event
  295. },
  296. _ => {
  297. // println!("event = {:?}", event);
  298. event
  299. },
  300. });
  301. cmark::html::push_html(&mut html, parser);
  302. }
  303. match error {
  304. Some(e) => Err(e),
  305. None => Ok(html.replace("<p></p>", "")),
  306. }
  307. }
  308. #[cfg(test)]
  309. mod tests {
  310. use std::collections::HashMap;
  311. use templates::GUTENBERG_TERA;
  312. use tera::Tera;
  313. use config::Config;
  314. use super::{markdown_to_html, parse_shortcode};
  315. #[test]
  316. fn can_parse_simple_shortcode_one_arg() {
  317. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc") }}"#);
  318. assert_eq!(name, "youtube");
  319. assert_eq!(args["id"], "w7Ft2ymGmfc");
  320. }
  321. #[test]
  322. fn can_parse_simple_shortcode_several_arg() {
  323. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc", autoplay=true) }}"#);
  324. assert_eq!(name, "youtube");
  325. assert_eq!(args["id"], "w7Ft2ymGmfc");
  326. assert_eq!(args["autoplay"], "true");
  327. }
  328. #[test]
  329. fn can_parse_block_shortcode_several_arg() {
  330. let (name, args) = parse_shortcode(r#"{% youtube(id="w7Ft2ymGmfc", autoplay=true) %}"#);
  331. assert_eq!(name, "youtube");
  332. assert_eq!(args["id"], "w7Ft2ymGmfc");
  333. assert_eq!(args["autoplay"], "true");
  334. }
  335. #[test]
  336. fn can_do_markdown_to_html_simple() {
  337. let res = markdown_to_html("hello", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  338. assert_eq!(res, "<p>hello</p>\n");
  339. }
  340. #[test]
  341. fn doesnt_highlight_code_block_with_highlighting_off() {
  342. let mut config = Config::default();
  343. config.highlight_code = Some(false);
  344. let res = markdown_to_html("```\n$ gutenberg server\n```", &HashMap::new(), &Tera::default(), &config).unwrap();
  345. assert_eq!(
  346. res,
  347. "<pre><code>$ gutenberg server\n</code></pre>\n"
  348. );
  349. }
  350. #[test]
  351. fn can_highlight_code_block_no_lang() {
  352. let res = markdown_to_html("```\n$ gutenberg server\n$ ping\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  353. assert_eq!(
  354. res,
  355. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">$ gutenberg server\n</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">$ ping\n</span></pre>"
  356. );
  357. }
  358. #[test]
  359. fn can_highlight_code_block_with_lang() {
  360. let res = markdown_to_html("```python\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  361. assert_eq!(
  362. res,
  363. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">.</span><span style=\"background-color:#2b303b;color:#bf616a;\">append</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">(</span><span style=\"background-color:#2b303b;color:#d08770;\">1</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">)</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">\n</span></pre>"
  364. );
  365. }
  366. #[test]
  367. fn can_higlight_code_block_with_unknown_lang() {
  368. let res = markdown_to_html("```yolo\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  369. // defaults to plain text
  370. assert_eq!(
  371. res,
  372. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list.append(1)\n</span></pre>"
  373. );
  374. }
  375. #[test]
  376. fn can_render_shortcode() {
  377. let res = markdown_to_html(r#"
  378. Hello
  379. {{ youtube(id="ub36ffWAqgQ") }}
  380. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  381. assert!(res.contains("<p>Hello</p>\n<div >"));
  382. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  383. }
  384. #[test]
  385. fn can_render_several_shortcode_in_row() {
  386. let res = markdown_to_html(r#"
  387. Hello
  388. {{ youtube(id="ub36ffWAqgQ") }}
  389. {{ youtube(id="ub36ffWAqgQ", autoplay=true) }}
  390. {{ vimeo(id="210073083") }}
  391. {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}
  392. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  393. assert!(res.contains("<p>Hello</p>\n<div >"));
  394. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  395. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ?autoplay=1""#));
  396. assert!(res.contains(r#"//player.vimeo.com/video/210073083""#));
  397. }
  398. #[test]
  399. fn doesnt_render_shortcode_in_code_block() {
  400. let res = markdown_to_html(r#"```{{ youtube(id="w7Ft2ymGmfc") }}```"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  401. assert_eq!(res, "<p><code>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</code></p>\n");
  402. }
  403. #[test]
  404. fn can_render_shortcode_with_body() {
  405. let mut tera = Tera::default();
  406. tera.extend(&GUTENBERG_TERA).unwrap();
  407. tera.add_raw_template("shortcodes/quote.html", "<blockquote>{{ body }} - {{ author}}</blockquote>").unwrap();
  408. let res = markdown_to_html(r#"
  409. Hello
  410. {% quote(author="Keats") %}
  411. A quote
  412. {% end %}
  413. "#, &HashMap::new(), &tera, &Config::default()).unwrap();
  414. assert_eq!(res, "<p>Hello\n</p><blockquote>A quote - Keats</blockquote>");
  415. }
  416. #[test]
  417. fn errors_rendering_unknown_shortcode() {
  418. let res = markdown_to_html("{{ hello(flash=true) }}", &HashMap::new(), &Tera::default(), &Config::default());
  419. assert!(res.is_err());
  420. }
  421. #[test]
  422. fn can_make_valid_relative_link() {
  423. let mut permalinks = HashMap::new();
  424. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  425. let res = markdown_to_html(
  426. r#"[rel link](./pages/about.md), [abs link](https://vincent.is/about)"#,
  427. &permalinks,
  428. &GUTENBERG_TERA,
  429. &Config::default()
  430. ).unwrap();
  431. assert!(
  432. res.contains(r#"<p><a href="https://vincent.is/about">rel link</a>, <a href="https://vincent.is/about">abs link</a></p>"#)
  433. );
  434. }
  435. #[test]
  436. fn can_make_relative_links_with_anchors() {
  437. let mut permalinks = HashMap::new();
  438. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  439. let res = markdown_to_html(
  440. r#"[rel link](./pages/about.md#cv)"#,
  441. &permalinks,
  442. &GUTENBERG_TERA,
  443. &Config::default()
  444. ).unwrap();
  445. assert!(
  446. res.contains(r#"<p><a href="https://vincent.is/about#cv">rel link</a></p>"#)
  447. );
  448. }
  449. #[test]
  450. fn errors_relative_link_inexistant() {
  451. let res = markdown_to_html("[rel link](./pages/about.md)", &HashMap::new(), &Tera::default(), &Config::default());
  452. assert!(res.is_err());
  453. }
  454. #[test]
  455. fn can_add_id_to_headers() {
  456. let res = markdown_to_html(r#"# Hello"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  457. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n");
  458. }
  459. #[test]
  460. fn can_add_id_to_headers_same_slug() {
  461. let res = markdown_to_html("# Hello\n# Hello", &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  462. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
  463. }
  464. #[test]
  465. fn can_insert_anchor() {
  466. let mut config = Config::default();
  467. config.insert_anchor_links = Some(true);
  468. let res = markdown_to_html("# Hello", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  469. assert_eq!(
  470. res,
  471. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello</h1>\n"
  472. );
  473. }
  474. // See https://github.com/Keats/gutenberg/issues/42
  475. #[test]
  476. fn can_insert_anchor_with_exclamation_mark() {
  477. let mut config = Config::default();
  478. config.insert_anchor_links = Some(true);
  479. let res = markdown_to_html("# Hello!", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  480. assert_eq!(
  481. res,
  482. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello!</h1>\n"
  483. );
  484. }
  485. // See https://github.com/Keats/gutenberg/issues/53
  486. #[test]
  487. fn can_insert_anchor_with_link() {
  488. let mut config = Config::default();
  489. config.insert_anchor_links = Some(true);
  490. let res = markdown_to_html("## [](#xresources)Xresources", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  491. assert_eq!(
  492. res,
  493. "<h2 id=\"xresources\"><a class=\"anchor\" href=\"#xresources\" aria-label=\"Anchor link for: xresources\">🔗</a>\nXresources</h2>\n"
  494. );
  495. }
  496. #[test]
  497. fn can_insert_anchor_with_other_special_chars() {
  498. let mut config = Config::default();
  499. config.insert_anchor_links = Some(true);
  500. let res = markdown_to_html("# Hello*_()", &HashMap::new(), &GUTENBERG_TERA, &config).unwrap();
  501. assert_eq!(
  502. res,
  503. "<h1 id=\"hello\"><a class=\"anchor\" href=\"#hello\" aria-label=\"Anchor link for: hello\">🔗</a>\nHello*_()</h1>\n"
  504. );
  505. }
  506. }