fn strip_chars(s: &str, chars: &str) -> String { let mut sanitized_string = s.to_string(); sanitized_string.retain( |c| !chars.contains(c)); sanitized_string } fn strip_invalid_paths_chars(s: &str) -> String { // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3 // Also we need to trim . from the end of filename let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.'); let cleaned = trimmed.replace(" ", "_"); // And () [] since they are not allowed in markdown links strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t") } fn strip_invalid_anchors_chars(s: &str) -> String { // spaces are not valid in markdown links let cleaned = s.replace(" ", "_"); // https://tools.ietf.org/html/rfc3986#section-3.5 strip_chars(&cleaned, "\"#%<>[\\]()^`{|}") } pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String { if slugify { // ASCII slugification slug::slugify(s) } else { // Only remove forbidden characters strip_invalid_paths_chars(s) } } pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String { if slugify { // ASCII slugification slug::slugify(s) } else { // Only remove forbidden characters strip_invalid_anchors_chars(s) } } #[cfg(test)] mod tests { use super::*; #[test] fn strip_invalid_paths_chars_works() { let tests = vec![ // no newlines ("test\ntest", "testtest"), // no whitespaces ("test ", "test"), ("t est ", "t_est"), // invalid NTFS ("test .", "test"), ("test. ", "test"), ("test#test/test?test", "testtesttesttest"), // Invalid CommonMark chars in links ("test (hey)", "test_hey"), ("test (hey", "test_hey"), ("test hey)", "test_hey"), ("test [hey]", "test_hey"), ("test [hey", "test_hey"), ("test hey]", "test_hey"), // UTF-8 ("日本", "日本"), ]; for (input, expected) in tests { assert_eq!(strip_invalid_paths_chars(&input), expected); } } #[test] fn strip_invalid_anchors_chars_works() { let tests = vec![ ("日本", "日本"), // Some invalid chars get removed ("test#", "test"), ("test<", "test"), ("test%", "test"), ("test^", "test"), ("test{", "test"), ("test|", "test"), ("test(", "test"), // Spaces are replaced by `_` ("test hey", "test_hey"), ]; for (input, expected) in tests { assert_eq!(strip_invalid_anchors_chars(&input), expected); } } #[test] fn maybe_slugify_paths_enabled() { assert_eq!(maybe_slugify_paths("héhé", true), "hehe"); } #[test] fn maybe_slugify_paths_disabled() { assert_eq!(maybe_slugify_paths("héhé", false), "héhé"); } }