jstrong
/
zola

fn strip_chars(s: &str, chars: &str) -> String {
    let mut sanitized_string = s.to_string();
    sanitized_string.retain( |c| !chars.contains(c));
    sanitized_string
}

fn strip_invalid_paths_chars(s: &str) -> String {
    // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
    // Also we need to trim . from the end of filename
    let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
    let cleaned = trimmed.replace(" ", "_");
    // And () [] since they are not allowed in markdown links
    strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
}

fn strip_invalid_anchors_chars(s: &str) -> String {
    // spaces are not valid in markdown links
    let cleaned = s.replace(" ", "_");
    // https://tools.ietf.org/html/rfc3986#section-3.5
    strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
}

pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
    if slugify {
        // ASCII slugification
        slug::slugify(s)
    }
    else {
        // Only remove forbidden characters
        strip_invalid_paths_chars(s)
    }
}

pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
    if slugify {
        // ASCII slugification
        slug::slugify(s)
    }
    else {
        // Only remove forbidden characters
        strip_invalid_anchors_chars(s)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn strip_invalid_paths_chars_works() {
        let tests = vec![
            // no newlines
            ("test\ntest", "testtest"),
            // no whitespaces
            ("test ", "test"),
            ("t est ", "t_est"),
            // invalid NTFS
            ("test .", "test"),
            ("test. ", "test"),
            ("test#test/test?test", "testtesttesttest"),
            // Invalid CommonMark chars in links
            ("test (hey)", "test_hey"),
            ("test (hey", "test_hey"),
            ("test hey)", "test_hey"),
            ("test [hey]", "test_hey"),
            ("test [hey", "test_hey"),
            ("test hey]", "test_hey"),
            // UTF-8
            ("日本", "日本"),
        ];

        for (input, expected) in tests {
            assert_eq!(strip_invalid_paths_chars(&input), expected);
        }
    }

    #[test]
    fn strip_invalid_anchors_chars_works() {
        let tests = vec![
            ("日本", "日本"),
            // Some invalid chars get removed
            ("test#", "test"),
            ("test<", "test"),
            ("test%", "test"),
            ("test^", "test"),
            ("test{", "test"),
            ("test|", "test"),
            ("test(", "test"),
            // Spaces are replaced by `_`
            ("test hey", "test_hey"),
        ];

        for (input, expected) in tests {
            assert_eq!(strip_invalid_anchors_chars(&input), expected);
        }
    }

    #[test]
    fn maybe_slugify_paths_enabled() {
        assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
    }

    #[test]
    fn maybe_slugify_paths_disabled() {
        assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
    }
}