From b1ceb3e80e3dac3de58f38c7a2d968e08445031d Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 10 Oct 2019 13:23:16 -0500 Subject: [PATCH] rendering: Avoid prepending URL prefix to links that start with a scheme (#817) Links that start with a scheme (e.g., `tel:18008675309`) inadvertently had a URL prefix prepended. Previously, only `mailto:` was handled, but given the sheer number of [registered URI schemes][uri-schemes], a loose pattern matcher is used to detect schemes instead. External links, as identified by the renderer, are now limited to `http` and `https` schemes. Fixes #747 and fixes #816. [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml --- components/rendering/src/markdown.rs | 63 +++++++++++++++++++++++++- components/rendering/tests/markdown.rs | 30 ++++++++++-- 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 437f558..673eca6 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -1,4 +1,5 @@ use pulldown_cmark as cmark; +use regex::Regex; use slug::slugify; use syntect::easy::HighlightLines; use syntect::html::{ @@ -60,11 +61,31 @@ fn find_anchor(anchors: &[String], name: String, level: u8) -> String { find_anchor(anchors, name, level + 1) } +// Returns whether the given string starts with a schema. +// +// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary, +// private schemes. This function checks if the given string starts with something that just looks +// like a scheme, i.e., a case-insensitive identifier followed by a colon. +// +// [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml +fn starts_with_schema(s: &str) -> bool { + lazy_static! { + static ref PATTERN: Regex = Regex::new(r"^[0-9A-Za-z\-]+:").unwrap(); + } + + PATTERN.is_match(s) +} + // Colocated asset links refers to the files in the same directory, // there it should be a filename only fn is_colocated_asset_link(link: &str) -> bool { !link.contains('/') // http://, ftp://, ../ etc - && !link.starts_with("mailto:") + && !starts_with_schema(link) +} + +// Returns whether a link starts with an HTTP(s) scheme. +fn is_external_link(link: &str) -> bool { + link.starts_with("http:") || link.starts_with("https:") } fn fix_link( @@ -103,7 +124,7 @@ fn fix_link( } else if is_colocated_asset_link(&link) { format!("{}{}", context.current_page_permalink, link) } else { - if !link.starts_with('#') && !link.starts_with("mailto:") { + if is_external_link(link) { external_links.push(link.to_owned()); } link.to_string() @@ -328,3 +349,41 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Resultfoo@bar.tld

\n"); + let permalinks_ctx = HashMap::new(); + + let context = RenderContext::new( + &tera_ctx, + &config, + "https://vincent.is/", + &permalinks_ctx, + InsertAnchor::None, + ); + + let res = render_content(content, &context).unwrap(); + + let expected = r#"

foo@bar.tld

+

(123) 456-7890

+

blank page

+"#; + + assert_eq!(res.body, expected); }