You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
6.6KB

  1. use lazy_static::lazy_static;
  2. use reqwest::header::{HeaderMap, ACCEPT};
  3. use reqwest::StatusCode;
  4. use config::LinkChecker;
  5. use errors::Result;
  6. use std::collections::HashMap;
  7. use std::error::Error;
  8. use std::sync::{Arc, RwLock};
  9. #[derive(Clone, Debug, PartialEq)]
  10. pub struct LinkResult {
  11. pub code: Option<StatusCode>,
  12. /// Whether the HTTP request didn't make it to getting a HTTP code
  13. pub error: Option<String>,
  14. }
  15. impl LinkResult {
  16. pub fn is_valid(&self) -> bool {
  17. if self.error.is_some() {
  18. return false;
  19. }
  20. if let Some(c) = self.code {
  21. return c.is_success();
  22. }
  23. true
  24. }
  25. pub fn message(&self) -> String {
  26. if let Some(ref e) = self.error {
  27. return e.clone();
  28. }
  29. if let Some(c) = self.code {
  30. return format!("{}", c);
  31. }
  32. "Unknown error".to_string()
  33. }
  34. }
  35. lazy_static! {
  36. // Keep history of link checks so a rebuild doesn't have to check again
  37. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  38. }
  39. pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
  40. {
  41. let guard = LINKS.read().unwrap();
  42. if let Some(res) = guard.get(url) {
  43. return res.clone();
  44. }
  45. }
  46. let mut headers = HeaderMap::new();
  47. headers.insert(ACCEPT, "text/html".parse().unwrap());
  48. headers.append(ACCEPT, "*/*".parse().unwrap());
  49. let client = reqwest::Client::new();
  50. let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
  51. // Need to actually do the link checking
  52. let res = match client.get(url).headers(headers).send() {
  53. Ok(ref mut response) if check_anchor && has_anchor(url) => {
  54. match check_page_for_anchor(url, response.text()) {
  55. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  56. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  57. }
  58. }
  59. Ok(response) => LinkResult { code: Some(response.status()), error: None },
  60. Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
  61. };
  62. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  63. res
  64. }
  65. fn has_anchor(url: &str) -> bool {
  66. match url.find('#') {
  67. Some(index) => match url.get(index..=index + 1) {
  68. Some("#/") | Some("#!") | None => false,
  69. Some(_) => true,
  70. },
  71. None => false,
  72. }
  73. }
  74. fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
  75. let body = body.unwrap();
  76. let index = url.find('#').unwrap();
  77. let anchor = url.get(index + 1..).unwrap();
  78. let checks: [String; 4] = [
  79. format!(" id='{}'", anchor),
  80. format!(r#" id="{}""#, anchor),
  81. format!(" name='{}'", anchor),
  82. format!(r#" name="{}""#, anchor),
  83. ];
  84. if checks.iter().any(|check| body[..].contains(&check[..])) {
  85. Ok(())
  86. } else {
  87. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  88. }
  89. }
  90. #[cfg(test)]
  91. mod tests {
  92. use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
  93. #[test]
  94. fn can_validate_ok_links() {
  95. let url = "https://google.com";
  96. let res = check_url(url, &LinkChecker::default());
  97. assert!(res.is_valid());
  98. assert!(LINKS.read().unwrap().get(url).is_some());
  99. let res = check_url(url, &LinkChecker::default());
  100. assert!(res.is_valid());
  101. }
  102. #[test]
  103. fn can_fail_404_links() {
  104. let res = check_url("https://google.comys", &LinkChecker::default());
  105. assert_eq!(res.is_valid(), false);
  106. assert!(res.code.is_none());
  107. assert!(res.error.is_some());
  108. }
  109. #[test]
  110. fn can_validate_anchors() {
  111. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  112. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  113. let res = check_page_for_anchor(url, Ok(body));
  114. assert!(res.is_ok());
  115. }
  116. #[test]
  117. fn can_validate_anchors_with_other_quotes() {
  118. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  119. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  120. let res = check_page_for_anchor(url, Ok(body));
  121. assert!(res.is_ok());
  122. }
  123. #[test]
  124. fn can_validate_anchors_with_name_attr() {
  125. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  126. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  127. let res = check_page_for_anchor(url, Ok(body));
  128. assert!(res.is_ok());
  129. }
  130. #[test]
  131. fn can_fail_when_anchor_not_found() {
  132. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  133. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  134. let res = check_page_for_anchor(url, Ok(body));
  135. assert!(res.is_err());
  136. }
  137. #[test]
  138. fn can_check_url_for_anchor() {
  139. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  140. let res = has_anchor(url);
  141. assert_eq!(res, true);
  142. }
  143. #[test]
  144. fn will_return_false_when_no_anchor() {
  145. let url = "https://doc.rust-lang.org/std/index.html";
  146. let res = has_anchor(url);
  147. assert_eq!(res, false);
  148. }
  149. #[test]
  150. fn will_return_false_when_has_router_url() {
  151. let url = "https://doc.rust-lang.org/#/std";
  152. let res = has_anchor(url);
  153. assert_eq!(res, false);
  154. }
  155. #[test]
  156. fn will_return_false_when_has_router_url_alt() {
  157. let url = "https://doc.rust-lang.org/#!/std";
  158. let res = has_anchor(url);
  159. assert_eq!(res, false);
  160. }
  161. #[test]
  162. fn skip_anchor_prefixes() {
  163. let config = LinkChecker {
  164. skip_prefixes: vec![],
  165. skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()],
  166. };
  167. // anchor check is ignored because the url matches the prefix
  168. let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214";
  169. assert!(check_url(&permalink, &config).is_valid());
  170. // other anchors are checked
  171. let glossary = "https://help.github.com/en/articles/github-glossary#blame";
  172. assert!(check_url(&glossary, &config).is_valid());
  173. let glossary_invalid =
  174. "https://help.github.com/en/articles/github-glossary#anchor-does-not-exist";
  175. assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false);
  176. }
  177. }