You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

slugs.rs 3.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. fn strip_chars(s: &str, chars: &str) -> String {
  2. let mut sanitized_string = s.to_string();
  3. sanitized_string.retain( |c| !chars.contains(c));
  4. sanitized_string
  5. }
  6. fn strip_invalid_paths_chars(s: &str) -> String {
  7. // NTFS forbidden characters : https://gist.github.com/doctaphred/d01d05291546186941e1b7ddc02034d3
  8. // Also we need to trim . from the end of filename
  9. let trimmed = s.trim_end_matches(|c| c == ' ' || c == '.');
  10. let cleaned = trimmed.replace(" ", "_");
  11. // And () [] since they are not allowed in markdown links
  12. strip_chars(&cleaned, "<>:/|?*#()[]\n\"\\\r\t")
  13. }
  14. fn strip_invalid_anchors_chars(s: &str) -> String {
  15. // spaces are not valid in markdown links
  16. let cleaned = s.replace(" ", "_");
  17. // https://tools.ietf.org/html/rfc3986#section-3.5
  18. strip_chars(&cleaned, "\"#%<>[\\]()^`{|}")
  19. }
  20. pub fn maybe_slugify_paths(s: &str, slugify: bool) -> String {
  21. if slugify {
  22. // ASCII slugification
  23. slug::slugify(s)
  24. }
  25. else {
  26. // Only remove forbidden characters
  27. strip_invalid_paths_chars(s)
  28. }
  29. }
  30. pub fn maybe_slugify_anchors(s: &str, slugify: bool) -> String {
  31. if slugify {
  32. // ASCII slugification
  33. slug::slugify(s)
  34. }
  35. else {
  36. // Only remove forbidden characters
  37. strip_invalid_anchors_chars(s)
  38. }
  39. }
  40. #[cfg(test)]
  41. mod tests {
  42. use super::*;
  43. #[test]
  44. fn strip_invalid_paths_chars_works() {
  45. let tests = vec![
  46. // no newlines
  47. ("test\ntest", "testtest"),
  48. // no whitespaces
  49. ("test ", "test"),
  50. ("t est ", "t_est"),
  51. // invalid NTFS
  52. ("test .", "test"),
  53. ("test. ", "test"),
  54. ("test#test/test?test", "testtesttesttest"),
  55. // Invalid CommonMark chars in links
  56. ("test (hey)", "test_hey"),
  57. ("test (hey", "test_hey"),
  58. ("test hey)", "test_hey"),
  59. ("test [hey]", "test_hey"),
  60. ("test [hey", "test_hey"),
  61. ("test hey]", "test_hey"),
  62. // UTF-8
  63. ("日本", "日本"),
  64. ];
  65. for (input, expected) in tests {
  66. assert_eq!(strip_invalid_paths_chars(&input), expected);
  67. }
  68. }
  69. #[test]
  70. fn strip_invalid_anchors_chars_works() {
  71. let tests = vec![
  72. ("日本", "日本"),
  73. // Some invalid chars get removed
  74. ("test#", "test"),
  75. ("test<", "test"),
  76. ("test%", "test"),
  77. ("test^", "test"),
  78. ("test{", "test"),
  79. ("test|", "test"),
  80. ("test(", "test"),
  81. // Spaces are replaced by `_`
  82. ("test hey", "test_hey"),
  83. ];
  84. for (input, expected) in tests {
  85. assert_eq!(strip_invalid_anchors_chars(&input), expected);
  86. }
  87. }
  88. #[test]
  89. fn maybe_slugify_paths_enabled() {
  90. assert_eq!(maybe_slugify_paths("héhé", true), "hehe");
  91. }
  92. #[test]
  93. fn maybe_slugify_paths_disabled() {
  94. assert_eq!(maybe_slugify_paths("héhé", false), "héhé");
  95. }
  96. }