You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

106 lines
3.0KB

  1. fn strip_chars(s: &str, chars: &str) -> String {
  2. let mut sanitized_string = s.to_string();
  3. sanitized_string.retain(|c| !chars.contains(c));
  4. sanitized_string
  5. }
  6. fn strip_invalid_paths_chars(s: &str) -> String {
  7. // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
  8. // Also we need to trim . from the end of filename
  9. let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
  10. let cleaned = trimmed.replace(" ", "_");
  11. // And () [] since they are not allowed in markdown links
  12. strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
  13. }
  14. fn strip_invalid_anchors_chars(s: &str) -> String {
  15. // spaces are not valid in markdown links
  16. let cleaned = s.replace(" ", "_");
  17. // https://tools.ietf.org/html/rfc3986#section-3.5
  18. strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
  19. }
  20. pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
  21. if slugify {
  22. // ASCII slugification
  23. slug::slugify(s)
  24. } else {
  25. // Only remove forbidden characters
  26. strip_invalid_paths_chars(s)
  27. }
  28. }
  29. pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
  30. if slugify {
  31. // ASCII slugification
  32. slug::slugify(s)
  33. } else {
  34. // Only remove forbidden characters
  35. strip_invalid_anchors_chars(s)
  36. }
  37. }
  38. #[cfg(test)]
  39. mod tests {
  40. use super::*;
  41. #[test]
  42. fn strip_invalid_paths_chars_works() {
  43. let tests = vec![
  44. // no newlines
  45. ("test\ntest", "testtest"),
  46. // no whitespaces
  47. ("test ", "test"),
  48. ("t est ", "t_est"),
  49. // invalid NTFS
  50. ("test .", "test"),
  51. ("test. ", "test"),
  52. ("test#test/test?test", "testtesttesttest"),
  53. // Invalid CommonMark chars in links
  54. ("test (hey)", "test_hey"),
  55. ("test (hey", "test_hey"),
  56. ("test hey)", "test_hey"),
  57. ("test [hey]", "test_hey"),
  58. ("test [hey", "test_hey"),
  59. ("test hey]", "test_hey"),
  60. // UTF-8
  61. ("日本", "日本"),
  62. ];
  63. for (input, expected) in tests {
  64. assert_eq!(strip_invalid_paths_chars(&input), expected);
  65. }
  66. }
  67. #[test]
  68. fn strip_invalid_anchors_chars_works() {
  69. let tests = vec![
  70. ("日本", "日本"),
  71. // Some invalid chars get removed
  72. ("test#", "test"),
  73. ("test<", "test"),
  74. ("test%", "test"),
  75. ("test^", "test"),
  76. ("test{", "test"),
  77. ("test|", "test"),
  78. ("test(", "test"),
  79. // Spaces are replaced by `_`
  80. ("test hey", "test_hey"),
  81. ];
  82. for (input, expected) in tests {
  83. assert_eq!(strip_invalid_anchors_chars(&input), expected);
  84. }
  85. }
  86. #[test]
  87. fn maybe_slugify_paths_enabled() {
  88. assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
  89. }
  90. #[test]
  91. fn maybe_slugify_paths_disabled() {
  92. assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
  93. }
  94. }