You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

302 lines
9.0KB

  1. use lazy_static::lazy_static;
  2. use reqwest::header::{HeaderMap, ACCEPT};
  3. use reqwest::StatusCode;
  4. use config::LinkChecker;
  5. use errors::Result;
  6. use std::collections::HashMap;
  7. use std::error::Error;
  8. use std::sync::{Arc, RwLock};
  9. #[derive(Clone, Debug, PartialEq)]
  10. pub struct LinkResult {
  11. pub code: Option<StatusCode>,
  12. /// Whether the HTTP request didn't make it to getting a HTTP code
  13. pub error: Option<String>,
  14. }
  15. impl LinkResult {
  16. pub fn is_valid(&self) -> bool {
  17. if self.error.is_some() {
  18. return false;
  19. }
  20. if let Some(c) = self.code {
  21. return c.is_success();
  22. }
  23. true
  24. }
  25. pub fn message(&self) -> String {
  26. if let Some(ref e) = self.error {
  27. return e.clone();
  28. }
  29. if let Some(c) = self.code {
  30. return format!("{}", c);
  31. }
  32. "Unknown error".to_string()
  33. }
  34. }
  35. lazy_static! {
  36. // Keep history of link checks so a rebuild doesn't have to check again
  37. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  38. }
  39. pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
  40. {
  41. let guard = LINKS.read().unwrap();
  42. if let Some(res) = guard.get(url) {
  43. return res.clone();
  44. }
  45. }
  46. let mut headers = HeaderMap::new();
  47. headers.insert(ACCEPT, "text/html".parse().unwrap());
  48. headers.append(ACCEPT, "*/*".parse().unwrap());
  49. let client = reqwest::Client::new();
  50. let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
  51. // Need to actually do the link checking
  52. let res = match client.get(url).headers(headers).send() {
  53. Ok(ref mut response) if check_anchor && has_anchor(url) => {
  54. match check_page_for_anchor(url, response.text()) {
  55. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  56. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  57. }
  58. }
  59. Ok(response) => {
  60. if response.status().is_success() {
  61. LinkResult { code: Some(response.status()), error: None }
  62. } else {
  63. let error_string = if response.status().is_informational() {
  64. String::from(format!(
  65. "Informational status code ({}) received",
  66. response.status()
  67. ))
  68. } else if response.status().is_redirection() {
  69. String::from(format!(
  70. "Redirection status code ({}) received",
  71. response.status()
  72. ))
  73. } else if response.status().is_client_error() {
  74. String::from(format!(
  75. "Client error status code ({}) received",
  76. response.status()
  77. ))
  78. } else if response.status().is_server_error() {
  79. String::from(format!(
  80. "Server error status code ({}) received",
  81. response.status()
  82. ))
  83. } else {
  84. String::from("Non-success status code received")
  85. };
  86. LinkResult { code: None, error: Some(error_string) }
  87. }
  88. }
  89. Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
  90. };
  91. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  92. res
  93. }
  94. fn has_anchor(url: &str) -> bool {
  95. match url.find('#') {
  96. Some(index) => match url.get(index..=index + 1) {
  97. Some("#/") | Some("#!") | None => false,
  98. Some(_) => true,
  99. },
  100. None => false,
  101. }
  102. }
  103. fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
  104. let body = body.unwrap();
  105. let index = url.find('#').unwrap();
  106. let anchor = url.get(index + 1..).unwrap();
  107. let checks: [String; 4] = [
  108. format!(" id='{}'", anchor),
  109. format!(r#" id="{}""#, anchor),
  110. format!(" name='{}'", anchor),
  111. format!(r#" name="{}""#, anchor),
  112. ];
  113. if checks.iter().any(|check| body[..].contains(&check[..])) {
  114. Ok(())
  115. } else {
  116. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  117. }
  118. }
  119. #[cfg(test)]
  120. mod tests {
  121. use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
  122. use mockito::mock;
  123. #[test]
  124. fn can_validate_ok_links() {
  125. let url = format!("{}{}", mockito::server_url(), "/test");
  126. let _m = mock("GET", "/test")
  127. .with_header("content-type", "text/html")
  128. .with_body(format!(
  129. r#"<!DOCTYPE html>
  130. <html>
  131. <head>
  132. <title>Test</title>
  133. </head>
  134. <body>
  135. <a href="{}">Mock URL</a>
  136. </body>
  137. </html>
  138. "#,
  139. url
  140. ))
  141. .create();
  142. let res = check_url(&url, &LinkChecker::default());
  143. assert!(res.is_valid());
  144. assert!(LINKS.read().unwrap().get(&url).is_some());
  145. }
  146. #[test]
  147. fn can_fail_unresolved_links() {
  148. let res = check_url("https://google.comys", &LinkChecker::default());
  149. assert_eq!(res.is_valid(), false);
  150. assert!(res.code.is_none());
  151. assert!(res.error.is_some());
  152. }
  153. #[test]
  154. fn can_fail_404_links() {
  155. let _m = mock("GET", "/404")
  156. .with_status(404)
  157. .with_header("content-type", "text/plain")
  158. .with_body("Not Found")
  159. .create();
  160. let url = format!("{}{}", mockito::server_url(), "/404");
  161. let res = check_url(&url, &LinkChecker::default());
  162. assert_eq!(res.is_valid(), false);
  163. assert!(res.code.is_none());
  164. assert!(res.error.is_some());
  165. }
  166. #[test]
  167. fn can_validate_anchors() {
  168. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  169. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  170. let res = check_page_for_anchor(url, Ok(body));
  171. assert!(res.is_ok());
  172. }
  173. #[test]
  174. fn can_validate_anchors_with_other_quotes() {
  175. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  176. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  177. let res = check_page_for_anchor(url, Ok(body));
  178. assert!(res.is_ok());
  179. }
  180. #[test]
  181. fn can_validate_anchors_with_name_attr() {
  182. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  183. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  184. let res = check_page_for_anchor(url, Ok(body));
  185. assert!(res.is_ok());
  186. }
  187. #[test]
  188. fn can_fail_when_anchor_not_found() {
  189. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  190. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  191. let res = check_page_for_anchor(url, Ok(body));
  192. assert!(res.is_err());
  193. }
  194. #[test]
  195. fn can_check_url_for_anchor() {
  196. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  197. let res = has_anchor(url);
  198. assert_eq!(res, true);
  199. }
  200. #[test]
  201. fn will_return_false_when_no_anchor() {
  202. let url = "https://doc.rust-lang.org/std/index.html";
  203. let res = has_anchor(url);
  204. assert_eq!(res, false);
  205. }
  206. #[test]
  207. fn will_return_false_when_has_router_url() {
  208. let url = "https://doc.rust-lang.org/#/std";
  209. let res = has_anchor(url);
  210. assert_eq!(res, false);
  211. }
  212. #[test]
  213. fn will_return_false_when_has_router_url_alt() {
  214. let url = "https://doc.rust-lang.org/#!/std";
  215. let res = has_anchor(url);
  216. assert_eq!(res, false);
  217. }
  218. #[test]
  219. fn skip_anchor_prefixes() {
  220. let ignore_url = format!("{}{}", mockito::server_url(), "/ignore/");
  221. let config = LinkChecker { skip_prefixes: vec![], skip_anchor_prefixes: vec![ignore_url] };
  222. let _m1 = mock("GET", "/ignore/test")
  223. .with_header("content-type", "text/html")
  224. .with_body(
  225. r#"<!DOCTYPE html>
  226. <html>
  227. <head>
  228. <title>Ignore</title>
  229. </head>
  230. <body>
  231. <p id="existent"></p>
  232. </body>
  233. </html>
  234. "#,
  235. )
  236. .create();
  237. // anchor check is ignored because the url matches the prefix
  238. let ignore = format!("{}{}", mockito::server_url(), "/ignore/test#nonexistent");
  239. assert!(check_url(&ignore, &config).is_valid());
  240. let _m2 = mock("GET", "/test")
  241. .with_header("content-type", "text/html")
  242. .with_body(
  243. r#"<!DOCTYPE html>
  244. <html>
  245. <head>
  246. <title>Test</title>
  247. </head>
  248. <body>
  249. <p id="existent"></p>
  250. </body>
  251. </html>
  252. "#,
  253. )
  254. .create();
  255. // other anchors are checked
  256. let existent = format!("{}{}", mockito::server_url(), "/test#existent");
  257. assert!(check_url(&existent, &config).is_valid());
  258. let nonexistent = format!("{}{}", mockito::server_url(), "/test#nonexistent");
  259. assert_eq!(check_url(&nonexistent, &config).is_valid(), false);
  260. }
  261. }