You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

markdown.rs 19KB

7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
7 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. use std::borrow::Cow::Owned;
  2. use std::collections::HashMap;
  3. use pulldown_cmark as cmark;
  4. use self::cmark::{Parser, Event, Tag};
  5. use regex::Regex;
  6. use slug::slugify;
  7. use syntect::dumps::from_binary;
  8. use syntect::easy::HighlightLines;
  9. use syntect::parsing::SyntaxSet;
  10. use syntect::highlighting::ThemeSet;
  11. use syntect::html::{start_coloured_html_snippet, styles_to_coloured_html, IncludeBackground};
  12. use tera::{Tera, Context};
  13. use config::Config;
  14. use errors::{Result, ResultExt};
  15. // We need to put those in a struct to impl Send and sync
  16. pub struct Setup {
  17. syntax_set: SyntaxSet,
  18. pub theme_set: ThemeSet,
  19. }
  20. unsafe impl Send for Setup {}
  21. unsafe impl Sync for Setup {}
  22. lazy_static!{
  23. static ref SHORTCODE_RE: Regex = Regex::new(r#"\{(?:%|\{)\s+([[:alnum:]]+?)\(([[:alnum:]]+?="?.+?"?)\)\s+(?:%|\})\}"#).unwrap();
  24. pub static ref SETUP: Setup = Setup {
  25. syntax_set: SyntaxSet::load_defaults_newlines(),
  26. theme_set: from_binary(include_bytes!("../sublime_themes/all.themedump"))
  27. };
  28. }
  29. /// A ShortCode that has a body
  30. /// Called by having some content like {% ... %} body {% end %}
  31. /// We need the struct to hold the data while we're processing the markdown
  32. #[derive(Debug)]
  33. struct ShortCode {
  34. name: String,
  35. args: HashMap<String, String>,
  36. body: String,
  37. }
  38. impl ShortCode {
  39. pub fn new(name: &str, args: HashMap<String, String>) -> ShortCode {
  40. ShortCode {
  41. name: name.to_string(),
  42. args: args,
  43. body: String::new(),
  44. }
  45. }
  46. pub fn append(&mut self, text: &str) {
  47. self.body.push_str(text)
  48. }
  49. pub fn render(&self, tera: &Tera) -> Result<String> {
  50. let mut context = Context::new();
  51. for (key, value) in self.args.iter() {
  52. context.add(key, value);
  53. }
  54. context.add("body", &self.body);
  55. let tpl_name = format!("shortcodes/{}.html", self.name);
  56. tera.render(&tpl_name, &context)
  57. .chain_err(|| format!("Failed to render {} shortcode", self.name))
  58. }
  59. }
  60. /// Parse a shortcode without a body
  61. fn parse_shortcode(input: &str) -> (String, HashMap<String, String>) {
  62. let mut args = HashMap::new();
  63. let caps = SHORTCODE_RE.captures(input).unwrap();
  64. // caps[0] is the full match
  65. let name = &caps[1];
  66. let arg_list = &caps[2];
  67. for arg in arg_list.split(',') {
  68. let bits = arg.split('=').collect::<Vec<_>>();
  69. args.insert(bits[0].trim().to_string(), bits[1].replace("\"", ""));
  70. }
  71. (name.to_string(), args)
  72. }
  73. /// Renders a shortcode or return an error
  74. fn render_simple_shortcode(tera: &Tera, name: &str, args: &HashMap<String, String>) -> Result<String> {
  75. let mut context = Context::new();
  76. for (key, value) in args.iter() {
  77. context.add(key, value);
  78. }
  79. let tpl_name = format!("shortcodes/{}.html", name);
  80. tera.render(&tpl_name, &context).chain_err(|| format!("Failed to render {} shortcode", name))
  81. }
  82. pub fn markdown_to_html(content: &str, permalinks: &HashMap<String, String>, tera: &Tera, config: &Config) -> Result<String> {
  83. // We try to be smart about highlighting code as it can be time-consuming
  84. // If the global config disables it, then we do nothing. However,
  85. // if we see a code block in the content, we assume that this page needs
  86. // to be highlighted. It could potentially have false positive if the content
  87. // has ``` in it but that seems kind of unlikely
  88. let should_highlight = if config.highlight_code.unwrap() {
  89. content.contains("```")
  90. } else {
  91. false
  92. };
  93. let highlight_theme = config.highlight_theme.clone().unwrap();
  94. // Set while parsing
  95. let mut error = None;
  96. let mut highlighter: Option<HighlightLines> = None;
  97. let mut shortcode_block = None;
  98. // shortcodes live outside of paragraph so we need to ensure we don't close
  99. // a paragraph that has already been closed
  100. let mut added_shortcode = false;
  101. // Don't transform things that look like shortcodes in code blocks
  102. let mut in_code_block = false;
  103. // If we get text in header, we need to insert the id and a anchor
  104. let mut in_header = false;
  105. // the rendered html
  106. let mut html = String::new();
  107. let mut anchors: Vec<String> = vec![];
  108. // We might have cases where the slug is already present in our list of anchor
  109. // for example an article could have several titles named Example
  110. // We add a counter after the slug if the slug is already present, which
  111. // means we will have example, example-1, example-2 etc
  112. fn find_anchor(anchors: &Vec<String>, name: String, level: u8) -> String {
  113. if level == 0 && !anchors.contains(&name) {
  114. return name.to_string();
  115. }
  116. let new_anchor = format!("{}-{}", name, level + 1);
  117. if !anchors.contains(&new_anchor) {
  118. return new_anchor;
  119. }
  120. find_anchor(anchors, name, level + 1)
  121. }
  122. {
  123. let parser = Parser::new(content).map(|event| match event {
  124. Event::Text(text) => {
  125. // if we are in the middle of a code block
  126. if let Some(ref mut highlighter) = highlighter {
  127. let highlighted = &highlighter.highlight(&text);
  128. let html = styles_to_coloured_html(highlighted, IncludeBackground::Yes);
  129. return Event::Html(Owned(html));
  130. }
  131. if in_code_block {
  132. return Event::Text(text);
  133. }
  134. // Shortcode without body
  135. if shortcode_block.is_none() && text.starts_with("{{") && text.ends_with("}}") {
  136. if SHORTCODE_RE.is_match(&text) {
  137. let (name, args) = parse_shortcode(&text);
  138. added_shortcode = true;
  139. match render_simple_shortcode(tera, &name, &args) {
  140. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  141. Err(e) => {
  142. error = Some(e);
  143. return Event::Html(Owned("".to_string()));
  144. }
  145. }
  146. }
  147. // non-matching will be returned normally below
  148. }
  149. // Shortcode with a body
  150. if shortcode_block.is_none() && text.starts_with("{%") && text.ends_with("%}") {
  151. if SHORTCODE_RE.is_match(&text) {
  152. let (name, args) = parse_shortcode(&text);
  153. shortcode_block = Some(ShortCode::new(&name, args));
  154. }
  155. // Don't return anything
  156. return Event::Text(Owned("".to_string()));
  157. }
  158. // If we have some text while in a shortcode, it's either the body
  159. // or the end tag
  160. if shortcode_block.is_some() {
  161. if let Some(ref mut shortcode) = shortcode_block {
  162. if text.trim() == "{% end %}" {
  163. added_shortcode = true;
  164. match shortcode.render(tera) {
  165. Ok(s) => return Event::Html(Owned(format!("</p>{}", s))),
  166. Err(e) => {
  167. error = Some(e);
  168. return Event::Html(Owned("".to_string()));
  169. }
  170. }
  171. } else {
  172. shortcode.append(&text);
  173. return Event::Html(Owned("".to_string()));
  174. }
  175. }
  176. }
  177. if in_header {
  178. let id = find_anchor(&anchors, slugify(&text), 0);
  179. anchors.push(id.clone());
  180. let anchor_link = if config.insert_anchor_links.unwrap() {
  181. let mut context = Context::new();
  182. context.add("id", &id);
  183. tera.render("anchor-link.html", &context).unwrap()
  184. } else {
  185. String::new()
  186. };
  187. return Event::Html(Owned(format!(r#"id="{}">{}{}"#, id, anchor_link, text)));
  188. }
  189. // Business as usual
  190. Event::Text(text)
  191. },
  192. Event::Start(Tag::CodeBlock(ref info)) => {
  193. in_code_block = true;
  194. if !should_highlight {
  195. return Event::Html(Owned("<pre><code>".to_owned()));
  196. }
  197. let theme = &SETUP.theme_set.themes[&highlight_theme];
  198. let syntax = info
  199. .split(' ')
  200. .next()
  201. .and_then(|lang| SETUP.syntax_set.find_syntax_by_token(lang))
  202. .unwrap_or_else(|| SETUP.syntax_set.find_syntax_plain_text());
  203. highlighter = Some(HighlightLines::new(syntax, theme));
  204. let snippet = start_coloured_html_snippet(theme);
  205. Event::Html(Owned(snippet))
  206. },
  207. Event::End(Tag::CodeBlock(_)) => {
  208. in_code_block = false;
  209. if !should_highlight{
  210. return Event::Html(Owned("</code></pre>\n".to_owned()))
  211. }
  212. // reset highlight and close the code block
  213. highlighter = None;
  214. Event::Html(Owned("</pre>".to_owned()))
  215. },
  216. // Need to handle relative links
  217. Event::Start(Tag::Link(ref link, ref title)) => {
  218. if link.starts_with("./") {
  219. // First we remove the ./ since that's gutenberg specific
  220. let clean_link = link.replacen("./", "", 1);
  221. // Then we remove any potential anchor
  222. // parts[0] will be the file path and parts[1] the anchor if present
  223. let parts = clean_link.split('#').collect::<Vec<_>>();
  224. match permalinks.get(parts[0]) {
  225. Some(p) => {
  226. let url = if parts.len() > 1 {
  227. format!("{}#{}", p, parts[1])
  228. } else {
  229. p.to_string()
  230. };
  231. return Event::Start(Tag::Link(Owned(url), title.clone()));
  232. },
  233. None => {
  234. error = Some(format!("Relative link {} not found.", link).into());
  235. return Event::Html(Owned("".to_string()));
  236. }
  237. };
  238. }
  239. return Event::Start(Tag::Link(link.clone(), title.clone()));
  240. },
  241. // need to know when we are in a code block to disable shortcodes in them
  242. Event::Start(Tag::Code) => {
  243. in_code_block = true;
  244. event
  245. },
  246. Event::End(Tag::Code) => {
  247. in_code_block = false;
  248. event
  249. },
  250. Event::Start(Tag::Header(num)) => {
  251. in_header = true;
  252. // ugly eh
  253. return Event::Html(Owned(format!("<h{} ", num)));
  254. },
  255. Event::End(Tag::Header(_)) => {
  256. in_header = false;
  257. event
  258. },
  259. // If we added shortcodes, don't close a paragraph since there's none
  260. Event::End(Tag::Paragraph) => {
  261. if added_shortcode {
  262. added_shortcode = false;
  263. return Event::Html(Owned("".to_owned()));
  264. }
  265. event
  266. },
  267. // Ignore softbreaks inside shortcodes
  268. Event::SoftBreak => {
  269. if shortcode_block.is_some() {
  270. return Event::Html(Owned("".to_owned()));
  271. }
  272. event
  273. },
  274. _ => {
  275. // println!("event = {:?}", event);
  276. event
  277. },
  278. });
  279. cmark::html::push_html(&mut html, parser);
  280. }
  281. match error {
  282. Some(e) => Err(e),
  283. None => Ok(html.replace("<p></p>", "")),
  284. }
  285. }
  286. #[cfg(test)]
  287. mod tests {
  288. use std::collections::HashMap;
  289. use site::GUTENBERG_TERA;
  290. use tera::Tera;
  291. use config::Config;
  292. use super::{markdown_to_html, parse_shortcode};
  293. #[test]
  294. fn test_parse_simple_shortcode_one_arg() {
  295. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc") }}"#);
  296. assert_eq!(name, "youtube");
  297. assert_eq!(args["id"], "w7Ft2ymGmfc");
  298. }
  299. #[test]
  300. fn test_parse_simple_shortcode_several_arg() {
  301. let (name, args) = parse_shortcode(r#"{{ youtube(id="w7Ft2ymGmfc", autoplay=true) }}"#);
  302. assert_eq!(name, "youtube");
  303. assert_eq!(args["id"], "w7Ft2ymGmfc");
  304. assert_eq!(args["autoplay"], "true");
  305. }
  306. #[test]
  307. fn test_parse_block_shortcode_several_arg() {
  308. let (name, args) = parse_shortcode(r#"{% youtube(id="w7Ft2ymGmfc", autoplay=true) %}"#);
  309. assert_eq!(name, "youtube");
  310. assert_eq!(args["id"], "w7Ft2ymGmfc");
  311. assert_eq!(args["autoplay"], "true");
  312. }
  313. #[test]
  314. fn test_markdown_to_html_simple() {
  315. let res = markdown_to_html("hello", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  316. assert_eq!(res, "<p>hello</p>\n");
  317. }
  318. #[test]
  319. fn test_markdown_to_html_code_block_highlighting_off() {
  320. let mut config = Config::default();
  321. config.highlight_code = Some(false);
  322. let res = markdown_to_html("```\n$ gutenberg server\n```", &HashMap::new(), &Tera::default(), &config).unwrap();
  323. assert_eq!(
  324. res,
  325. "<pre><code>$ gutenberg server\n</code></pre>\n"
  326. );
  327. }
  328. #[test]
  329. fn test_markdown_to_html_code_block_no_lang() {
  330. let res = markdown_to_html("```\n$ gutenberg server\n$ ping\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  331. assert_eq!(
  332. res,
  333. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">$ gutenberg server\n</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">$ ping\n</span></pre>"
  334. );
  335. }
  336. #[test]
  337. fn test_markdown_to_html_code_block_with_lang() {
  338. let res = markdown_to_html("```python\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  339. assert_eq!(
  340. res,
  341. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">.</span><span style=\"background-color:#2b303b;color:#bf616a;\">append</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">(</span><span style=\"background-color:#2b303b;color:#d08770;\">1</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">)</span><span style=\"background-color:#2b303b;color:#c0c5ce;\">\n</span></pre>"
  342. );
  343. }
  344. #[test]
  345. fn test_markdown_to_html_code_block_with_unknown_lang() {
  346. let res = markdown_to_html("```yolo\nlist.append(1)\n```", &HashMap::new(), &Tera::default(), &Config::default()).unwrap();
  347. // defaults to plain text
  348. assert_eq!(
  349. res,
  350. "<pre style=\"background-color:#2b303b\">\n<span style=\"background-color:#2b303b;color:#c0c5ce;\">list.append(1)\n</span></pre>"
  351. );
  352. }
  353. #[test]
  354. fn test_markdown_to_html_with_shortcode() {
  355. let res = markdown_to_html(r#"
  356. Hello
  357. {{ youtube(id="ub36ffWAqgQ") }}
  358. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  359. assert!(res.contains("<p>Hello</p>\n<div >"));
  360. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  361. }
  362. #[test]
  363. fn test_markdown_to_html_with_several_shortcode_in_row() {
  364. let res = markdown_to_html(r#"
  365. Hello
  366. {{ youtube(id="ub36ffWAqgQ") }}
  367. {{ youtube(id="ub36ffWAqgQ", autoplay=true) }}
  368. {{ vimeo(id="210073083") }}
  369. {{ gist(url="https://gist.github.com/Keats/32d26f699dcc13ebd41b") }}
  370. "#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  371. assert!(res.contains("<p>Hello</p>\n<div >"));
  372. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ""#));
  373. assert!(res.contains(r#"<iframe src="https://www.youtube.com/embed/ub36ffWAqgQ?autoplay=1""#));
  374. assert!(res.contains(r#"//player.vimeo.com/video/210073083""#));
  375. }
  376. #[test]
  377. fn test_markdown_to_html_shortcode_in_code_block() {
  378. let res = markdown_to_html(r#"```{{ youtube(id="w7Ft2ymGmfc") }}```"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  379. assert_eq!(res, "<p><code>{{ youtube(id=&quot;w7Ft2ymGmfc&quot;) }}</code></p>\n");
  380. }
  381. #[test]
  382. fn test_markdown_to_html_shortcode_with_body() {
  383. let mut tera = Tera::default();
  384. tera.extend(&GUTENBERG_TERA).unwrap();
  385. tera.add_raw_template("shortcodes/quote.html", "<blockquote>{{ body }} - {{ author}}</blockquote>").unwrap();
  386. let res = markdown_to_html(r#"
  387. Hello
  388. {% quote(author="Keats") %}
  389. A quote
  390. {% end %}
  391. "#, &HashMap::new(), &tera, &Config::default()).unwrap();
  392. assert_eq!(res, "<p>Hello\n</p><blockquote>A quote - Keats</blockquote>");
  393. }
  394. #[test]
  395. fn test_markdown_to_html_unknown_shortcode() {
  396. let res = markdown_to_html("{{ hello(flash=true) }}", &HashMap::new(), &Tera::default(), &Config::default());
  397. assert!(res.is_err());
  398. }
  399. #[test]
  400. fn test_markdown_to_html_relative_link_exists() {
  401. let mut permalinks = HashMap::new();
  402. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  403. let res = markdown_to_html(
  404. r#"[rel link](./pages/about.md), [abs link](https://vincent.is/about)"#,
  405. &permalinks,
  406. &GUTENBERG_TERA,
  407. &Config::default()
  408. ).unwrap();
  409. assert!(
  410. res.contains(r#"<p><a href="https://vincent.is/about">rel link</a>, <a href="https://vincent.is/about">abs link</a></p>"#)
  411. );
  412. }
  413. #[test]
  414. fn test_markdown_to_html_relative_links_with_anchors() {
  415. let mut permalinks = HashMap::new();
  416. permalinks.insert("pages/about.md".to_string(), "https://vincent.is/about".to_string());
  417. let res = markdown_to_html(
  418. r#"[rel link](./pages/about.md#cv)"#,
  419. &permalinks,
  420. &GUTENBERG_TERA,
  421. &Config::default()
  422. ).unwrap();
  423. assert!(
  424. res.contains(r#"<p><a href="https://vincent.is/about#cv">rel link</a></p>"#)
  425. );
  426. }
  427. #[test]
  428. fn test_markdown_to_html_relative_link_inexistant() {
  429. let res = markdown_to_html("[rel link](./pages/about.md)", &HashMap::new(), &Tera::default(), &Config::default());
  430. assert!(res.is_err());
  431. }
  432. #[test]
  433. fn test_markdown_to_html_add_id_to_headers() {
  434. let res = markdown_to_html(r#"# Hello"#, &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  435. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n");
  436. }
  437. #[test]
  438. fn test_markdown_to_html_add_id_to_headers_same_slug() {
  439. let res = markdown_to_html("# Hello\n# Hello", &HashMap::new(), &GUTENBERG_TERA, &Config::default()).unwrap();
  440. assert_eq!(res, "<h1 id=\"hello\">Hello</h1>\n<h1 id=\"hello-1\">Hello</h1>\n");
  441. }
  442. }