You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

218 lines
6.7KB

  1. extern crate reqwest;
  2. #[macro_use]
  3. extern crate lazy_static;
  4. extern crate config;
  5. extern crate errors;
  6. use reqwest::header::{HeaderMap, ACCEPT};
  7. use reqwest::StatusCode;
  8. use config::LinkChecker;
  9. use errors::Result;
  10. use std::collections::HashMap;
  11. use std::error::Error;
  12. use std::sync::{Arc, RwLock};
  13. #[derive(Clone, Debug, PartialEq)]
  14. pub struct LinkResult {
  15. pub code: Option<StatusCode>,
  16. /// Whether the HTTP request didn't make it to getting a HTTP code
  17. pub error: Option<String>,
  18. }
  19. impl LinkResult {
  20. pub fn is_valid(&self) -> bool {
  21. if self.error.is_some() {
  22. return false;
  23. }
  24. if let Some(c) = self.code {
  25. return c.is_success();
  26. }
  27. true
  28. }
  29. pub fn message(&self) -> String {
  30. if let Some(ref e) = self.error {
  31. return e.clone();
  32. }
  33. if let Some(c) = self.code {
  34. return format!("{}", c);
  35. }
  36. "Unknown error".to_string()
  37. }
  38. }
  39. lazy_static! {
  40. // Keep history of link checks so a rebuild doesn't have to check again
  41. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  42. }
  43. pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
  44. {
  45. let guard = LINKS.read().unwrap();
  46. if let Some(res) = guard.get(url) {
  47. return res.clone();
  48. }
  49. }
  50. let mut headers = HeaderMap::new();
  51. headers.insert(ACCEPT, "text/html".parse().unwrap());
  52. headers.append(ACCEPT, "*/*".parse().unwrap());
  53. let client = reqwest::Client::new();
  54. let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
  55. // Need to actually do the link checking
  56. let res = match client.get(url).headers(headers).send() {
  57. Ok(ref mut response) if check_anchor && has_anchor(url) => {
  58. match check_page_for_anchor(url, response.text()) {
  59. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  60. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  61. }
  62. }
  63. Ok(response) => LinkResult { code: Some(response.status()), error: None },
  64. Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
  65. };
  66. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  67. res
  68. }
  69. fn has_anchor(url: &str) -> bool {
  70. match url.find('#') {
  71. Some(index) => match url.get(index..=index + 1) {
  72. Some("#/") | Some("#!") | None => false,
  73. Some(_) => true,
  74. },
  75. None => false,
  76. }
  77. }
  78. fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
  79. let body = body.unwrap();
  80. let index = url.find('#').unwrap();
  81. let anchor = url.get(index + 1..).unwrap();
  82. let checks: [String; 4] = [
  83. format!(" id='{}'", anchor),
  84. format!(r#" id="{}""#, anchor),
  85. format!(" name='{}'", anchor),
  86. format!(r#" name="{}""#, anchor),
  87. ];
  88. if checks.iter().any(|check| body[..].contains(&check[..])) {
  89. Ok(())
  90. } else {
  91. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  92. }
  93. }
  94. #[cfg(test)]
  95. mod tests {
  96. use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
  97. #[test]
  98. fn can_validate_ok_links() {
  99. let url = "https://google.com";
  100. let res = check_url(url, &LinkChecker::default());
  101. assert!(res.is_valid());
  102. assert!(LINKS.read().unwrap().get(url).is_some());
  103. let res = check_url(url, &LinkChecker::default());
  104. assert!(res.is_valid());
  105. }
  106. #[test]
  107. fn can_fail_404_links() {
  108. let res = check_url("https://google.comys", &LinkChecker::default());
  109. assert_eq!(res.is_valid(), false);
  110. assert!(res.code.is_none());
  111. assert!(res.error.is_some());
  112. }
  113. #[test]
  114. fn can_validate_anchors() {
  115. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  116. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  117. let res = check_page_for_anchor(url, Ok(body));
  118. assert!(res.is_ok());
  119. }
  120. #[test]
  121. fn can_validate_anchors_with_other_quotes() {
  122. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  123. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  124. let res = check_page_for_anchor(url, Ok(body));
  125. assert!(res.is_ok());
  126. }
  127. #[test]
  128. fn can_validate_anchors_with_name_attr() {
  129. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  130. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  131. let res = check_page_for_anchor(url, Ok(body));
  132. assert!(res.is_ok());
  133. }
  134. #[test]
  135. fn can_fail_when_anchor_not_found() {
  136. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  137. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  138. let res = check_page_for_anchor(url, Ok(body));
  139. assert!(res.is_err());
  140. }
  141. #[test]
  142. fn can_check_url_for_anchor() {
  143. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  144. let res = has_anchor(url);
  145. assert_eq!(res, true);
  146. }
  147. #[test]
  148. fn will_return_false_when_no_anchor() {
  149. let url = "https://doc.rust-lang.org/std/index.html";
  150. let res = has_anchor(url);
  151. assert_eq!(res, false);
  152. }
  153. #[test]
  154. fn will_return_false_when_has_router_url() {
  155. let url = "https://doc.rust-lang.org/#/std";
  156. let res = has_anchor(url);
  157. assert_eq!(res, false);
  158. }
  159. #[test]
  160. fn will_return_false_when_has_router_url_alt() {
  161. let url = "https://doc.rust-lang.org/#!/std";
  162. let res = has_anchor(url);
  163. assert_eq!(res, false);
  164. }
  165. #[test]
  166. fn skip_anchor_prefixes() {
  167. let config = LinkChecker {
  168. skip_prefixes: vec![],
  169. skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()],
  170. };
  171. // anchor check is ignored because the url matches the prefix
  172. let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214";
  173. assert!(check_url(&permalink, &config).is_valid());
  174. // other anchors are checked
  175. let glossary = "https://help.github.com/en/articles/github-glossary#blame";
  176. assert!(check_url(&glossary, &config).is_valid());
  177. let glossary_invalid =
  178. "https://help.github.com/en/articles/github-glossary#anchor-does-not-exist";
  179. assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false);
  180. }
  181. }