You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

392 lines
14KB

  1. use pulldown_cmark as cmark;
  2. use regex::Regex;
  3. use syntect::easy::HighlightLines;
  4. use syntect::html::{
  5. start_highlighted_html_snippet, styled_line_to_highlighted_html, IncludeBackground,
  6. };
  7. use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET};
  8. use context::RenderContext;
  9. use errors::{Error, Result};
  10. use front_matter::InsertAnchor;
  11. use table_of_contents::{make_table_of_contents, Heading};
  12. use utils::site::resolve_internal_link;
  13. use utils::vec::InsertMany;
  14. use utils::slugs::maybe_slugify_anchors;
  15. use self::cmark::{Event, LinkType, Options, Parser, Tag};
  16. const CONTINUE_READING: &str =
  17. "<p id=\"zola-continue-reading\"><a name=\"continue-reading\"></a></p>\n";
  18. const ANCHOR_LINK_TEMPLATE: &str = "anchor-link.html";
  19. #[derive(Debug)]
  20. pub struct Rendered {
  21. pub body: String,
  22. pub summary_len: Option<usize>,
  23. pub toc: Vec<Heading>,
  24. pub internal_links_with_anchors: Vec<(String, String)>,
  25. pub external_links: Vec<String>,
  26. }
  27. // tracks a heading in a slice of pulldown-cmark events
  28. #[derive(Debug)]
  29. struct HeadingRef {
  30. start_idx: usize,
  31. end_idx: usize,
  32. level: u32,
  33. id: Option<String>,
  34. }
  35. impl HeadingRef {
  36. fn new(start: usize, level: u32) -> HeadingRef {
  37. HeadingRef { start_idx: start, end_idx: 0, level, id: None }
  38. }
  39. }
  40. // We might have cases where the slug is already present in our list of anchor
  41. // for example an article could have several titles named Example
  42. // We add a counter after the slug if the slug is already present, which
  43. // means we will have example, example-1, example-2 etc
  44. fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
  45. if level == 0 && !anchors.contains(&name) {
  46. return name;
  47. }
  48. let new_anchor = format!("{}-{}", name, level + 1);
  49. if !anchors.contains(&new_anchor) {
  50. return new_anchor;
  51. }
  52. find_anchor(anchors, name, level + 1)
  53. }
  54. // Returns whether the given string starts with a schema.
  55. //
  56. // Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
  57. // private schemes. This function checks if the given string starts with something that just looks
  58. // like a scheme, i.e., a case-insensitive identifier followed by a colon.
  59. //
  60. // [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
  61. fn starts_with_schema(s: &str) -> bool {
  62. lazy_static! {
  63. static ref PATTERN: Regex = Regex::new(r"^[0-9A-Za-z\-]+:").unwrap();
  64. }
  65. PATTERN.is_match(s)
  66. }
  67. // Colocated asset links refers to the files in the same directory,
  68. // there it should be a filename only
  69. fn is_colocated_asset_link(link: &str) -> bool {
  70. !link.contains('/') // http://, ftp://, ../ etc
  71. && !starts_with_schema(link)
  72. }
  73. // Returns whether a link starts with an HTTP(s) scheme.
  74. fn is_external_link(link: &str) -> bool {
  75. link.starts_with("http:") || link.starts_with("https:")
  76. }
  77. fn fix_link(
  78. link_type: LinkType,
  79. link: &str,
  80. context: &RenderContext,
  81. internal_links_with_anchors: &mut Vec<(String, String)>,
  82. external_links: &mut Vec<String>,
  83. ) -> Result<String> {
  84. if link_type == LinkType::Email {
  85. return Ok(link.to_string());
  86. }
  87. // TODO: remove me in a few versions when people have upgraded
  88. if link.starts_with("./") && link.contains(".md") {
  89. println!("It looks like the link `{}` is using the previous syntax for internal links: start with @/ instead", link);
  90. }
  91. // A few situations here:
  92. // - it could be a relative link (starting with `@/`)
  93. // - it could be a link to a co-located asset
  94. // - it could be a normal link
  95. let result = if link.starts_with("@/") {
  96. match resolve_internal_link(&link, context.permalinks) {
  97. Ok(resolved) => {
  98. if resolved.anchor.is_some() {
  99. internal_links_with_anchors
  100. .push((resolved.md_path.unwrap(), resolved.anchor.unwrap()));
  101. }
  102. resolved.permalink
  103. }
  104. Err(_) => {
  105. return Err(format!("Relative link {} not found.", link).into());
  106. }
  107. }
  108. } else if is_colocated_asset_link(&link) {
  109. format!("{}{}", context.current_page_permalink, link)
  110. } else {
  111. if is_external_link(link) {
  112. external_links.push(link.to_owned());
  113. }
  114. link.to_string()
  115. };
  116. Ok(result)
  117. }
  118. /// get only text in a slice of events
  119. fn get_text(parser_slice: &[Event]) -> String {
  120. let mut title = String::new();
  121. for event in parser_slice.iter() {
  122. match event {
  123. Event::Text(text) | Event::Code(text) => title += text,
  124. _ => continue,
  125. }
  126. }
  127. title
  128. }
  129. fn get_heading_refs(events: &[Event]) -> Vec<HeadingRef> {
  130. let mut heading_refs = vec![];
  131. for (i, event) in events.iter().enumerate() {
  132. match event {
  133. Event::Start(Tag::Heading(level)) => {
  134. heading_refs.push(HeadingRef::new(i, *level));
  135. }
  136. Event::End(Tag::Heading(_)) => {
  137. let msg = "Heading end before start?";
  138. heading_refs.last_mut().expect(msg).end_idx = i;
  139. }
  140. _ => (),
  141. }
  142. }
  143. heading_refs
  144. }
  145. pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Rendered> {
  146. // the rendered html
  147. let mut html = String::with_capacity(content.len());
  148. // Set while parsing
  149. let mut error = None;
  150. let mut background = IncludeBackground::Yes;
  151. let mut highlighter: Option<(HighlightLines, bool)> = None;
  152. let mut inserted_anchors: Vec<String> = vec![];
  153. let mut headings: Vec<Heading> = vec![];
  154. let mut internal_links_with_anchors = Vec::new();
  155. let mut external_links = Vec::new();
  156. let mut opts = Options::empty();
  157. let mut has_summary = false;
  158. opts.insert(Options::ENABLE_TABLES);
  159. opts.insert(Options::ENABLE_FOOTNOTES);
  160. {
  161. let mut events = Parser::new_ext(content, opts)
  162. .map(|event| {
  163. match event {
  164. Event::Text(text) => {
  165. // if we are in the middle of a code block
  166. if let Some((ref mut highlighter, in_extra)) = highlighter {
  167. let highlighted = if in_extra {
  168. if let Some(ref extra) = context.config.extra_syntax_set {
  169. highlighter.highlight(&text, &extra)
  170. } else {
  171. unreachable!(
  172. "Got a highlighter from extra syntaxes but no extra?"
  173. );
  174. }
  175. } else {
  176. highlighter.highlight(&text, &SYNTAX_SET)
  177. };
  178. //let highlighted = &highlighter.highlight(&text, ss);
  179. let html = styled_line_to_highlighted_html(&highlighted, background);
  180. return Event::Html(html.into());
  181. }
  182. // Business as usual
  183. Event::Text(text)
  184. }
  185. Event::Start(Tag::CodeBlock(ref info)) => {
  186. if !context.config.highlight_code {
  187. return Event::Html("<pre><code>".into());
  188. }
  189. let theme = &THEME_SET.themes[&context.config.highlight_theme];
  190. highlighter = Some(get_highlighter(info, &context.config));
  191. // This selects the background color the same way that start_coloured_html_snippet does
  192. let color = theme
  193. .settings
  194. .background
  195. .unwrap_or(::syntect::highlighting::Color::WHITE);
  196. background = IncludeBackground::IfDifferent(color);
  197. let snippet = start_highlighted_html_snippet(theme);
  198. Event::Html(snippet.0.into())
  199. }
  200. Event::End(Tag::CodeBlock(_)) => {
  201. if !context.config.highlight_code {
  202. return Event::Html("</code></pre>\n".into());
  203. }
  204. // reset highlight and close the code block
  205. highlighter = None;
  206. Event::Html("</pre>".into())
  207. }
  208. Event::Start(Tag::Image(link_type, src, title)) => {
  209. if is_colocated_asset_link(&src) {
  210. let link = format!("{}{}", context.current_page_permalink, &*src);
  211. return Event::Start(Tag::Image(link_type, link.into(), title));
  212. }
  213. Event::Start(Tag::Image(link_type, src, title))
  214. }
  215. Event::Start(Tag::Link(link_type, link, title)) => {
  216. let fixed_link = match fix_link(
  217. link_type,
  218. &link,
  219. context,
  220. &mut internal_links_with_anchors,
  221. &mut external_links,
  222. ) {
  223. Ok(fixed_link) => fixed_link,
  224. Err(err) => {
  225. error = Some(err);
  226. return Event::Html("".into());
  227. }
  228. };
  229. Event::Start(Tag::Link(link_type, fixed_link.into(), title))
  230. }
  231. Event::Html(ref markup) if markup.contains("<!-- more -->") => {
  232. has_summary = true;
  233. Event::Html(CONTINUE_READING.into())
  234. }
  235. _ => event,
  236. }
  237. })
  238. .collect::<Vec<_>>(); // We need to collect the events to make a second pass
  239. let mut heading_refs = get_heading_refs(&events);
  240. let mut anchors_to_insert = vec![];
  241. // First heading pass: look for a manually-specified IDs, e.g. `# Heading text {#hash}`
  242. // (This is a separate first pass so that auto IDs can avoid collisions with manual IDs.)
  243. for heading_ref in heading_refs.iter_mut() {
  244. let end_idx = heading_ref.end_idx;
  245. if let Event::Text(ref mut text) = events[end_idx - 1] {
  246. if text.as_bytes().last() == Some(&b'}') {
  247. if let Some(mut i) = text.find("{#") {
  248. let id = text[i + 2..text.len() - 1].to_owned();
  249. inserted_anchors.push(id.clone());
  250. while i > 0 && text.as_bytes()[i - 1] == b' ' {
  251. i -= 1;
  252. }
  253. heading_ref.id = Some(id);
  254. *text = text[..i].to_owned().into();
  255. }
  256. }
  257. }
  258. }
  259. // Second heading pass: auto-generate remaining IDs, and emit HTML
  260. for heading_ref in heading_refs {
  261. let start_idx = heading_ref.start_idx;
  262. let end_idx = heading_ref.end_idx;
  263. let title = get_text(&events[start_idx + 1..end_idx]);
  264. let id = heading_ref
  265. .id
  266. .unwrap_or_else(|| find_anchor(&inserted_anchors, maybe_slugify_anchors(&title, context.config.slugify_paths), 0));
  267. inserted_anchors.push(id.clone());
  268. // insert `id` to the tag
  269. let html = format!("<h{lvl} id=\"{id}\">", lvl = heading_ref.level, id = id);
  270. events[start_idx] = Event::Html(html.into());
  271. // generate anchors and places to insert them
  272. if context.insert_anchor != InsertAnchor::None {
  273. let anchor_idx = match context.insert_anchor {
  274. InsertAnchor::Left => start_idx + 1,
  275. InsertAnchor::Right => end_idx,
  276. InsertAnchor::None => 0, // Not important
  277. };
  278. let mut c = tera::Context::new();
  279. c.insert("id", &id);
  280. let anchor_link = utils::templates::render_template(
  281. &ANCHOR_LINK_TEMPLATE,
  282. context.tera,
  283. c,
  284. &None,
  285. )
  286. .map_err(|e| Error::chain("Failed to render anchor link template", e))?;
  287. anchors_to_insert.push((anchor_idx, Event::Html(anchor_link.into())));
  288. }
  289. // record heading to make table of contents
  290. let permalink = format!("{}#{}", context.current_page_permalink, id);
  291. let h =
  292. Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
  293. headings.push(h);
  294. }
  295. if context.insert_anchor != InsertAnchor::None {
  296. events.insert_many(anchors_to_insert);
  297. }
  298. cmark::html::push_html(&mut html, events.into_iter());
  299. }
  300. if let Some(e) = error {
  301. Err(e)
  302. } else {
  303. Ok(Rendered {
  304. summary_len: if has_summary { html.find(CONTINUE_READING) } else { None },
  305. body: html,
  306. toc: make_table_of_contents(headings),
  307. internal_links_with_anchors,
  308. external_links,
  309. })
  310. }
  311. }
  312. #[cfg(test)]
  313. mod tests {
  314. use super::*;
  315. #[test]
  316. fn test_starts_with_schema() {
  317. // registered
  318. assert!(starts_with_schema("https://example.com/"));
  319. assert!(starts_with_schema("ftp://example.com/"));
  320. assert!(starts_with_schema("mailto:user@example.com"));
  321. assert!(starts_with_schema("xmpp:node@example.com"));
  322. assert!(starts_with_schema("tel:18008675309"));
  323. assert!(starts_with_schema("sms:18008675309"));
  324. assert!(starts_with_schema("h323:user@example.com"));
  325. // arbitrary
  326. assert!(starts_with_schema("zola:post?content=hi"));
  327. // case-insensitive
  328. assert!(starts_with_schema("MailTo:user@example.com"));
  329. assert!(starts_with_schema("MAILTO:user@example.com"));
  330. }
  331. #[test]
  332. fn test_is_external_link() {
  333. assert!(is_external_link("http://example.com/"));
  334. assert!(is_external_link("https://example.com/"));
  335. assert!(is_external_link("https://example.com/index.html#introduction"));
  336. assert!(!is_external_link("mailto:user@example.com"));
  337. assert!(!is_external_link("tel:18008675309"));
  338. assert!(!is_external_link("#introduction"));
  339. assert!(!is_external_link("http.jpg"))
  340. }
  341. }