You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
11KB

  1. use lazy_static::lazy_static;
  2. use reqwest::header::{HeaderMap, ACCEPT};
  3. use reqwest::StatusCode;
  4. use config::LinkChecker;
  5. use errors::Result;
  6. use std::collections::HashMap;
  7. use std::error::Error;
  8. use std::sync::{Arc, RwLock};
  9. #[derive(Clone, Debug, PartialEq)]
  10. pub struct LinkResult {
  11. pub code: Option<StatusCode>,
  12. /// Whether the HTTP request didn't make it to getting a HTTP code
  13. pub error: Option<String>,
  14. }
  15. impl LinkResult {
  16. pub fn is_valid(&self) -> bool {
  17. if self.error.is_some() {
  18. return false;
  19. }
  20. if let Some(c) = self.code {
  21. return c.is_success() || c == StatusCode::NOT_MODIFIED;
  22. }
  23. true
  24. }
  25. pub fn message(&self) -> String {
  26. if let Some(ref e) = self.error {
  27. return e.clone();
  28. }
  29. if let Some(c) = self.code {
  30. return format!("{}", c);
  31. }
  32. "Unknown error".to_string()
  33. }
  34. }
  35. lazy_static! {
  36. // Keep history of link checks so a rebuild doesn't have to check again
  37. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  38. }
  39. pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
  40. {
  41. let guard = LINKS.read().unwrap();
  42. if let Some(res) = guard.get(url) {
  43. return res.clone();
  44. }
  45. }
  46. let mut headers = HeaderMap::new();
  47. headers.insert(ACCEPT, "text/html".parse().unwrap());
  48. headers.append(ACCEPT, "*/*".parse().unwrap());
  49. let client = reqwest::Client::new();
  50. let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
  51. // Need to actually do the link checking
  52. let res = match client.get(url).headers(headers).send() {
  53. Ok(ref mut response) if check_anchor && has_anchor(url) => {
  54. match check_page_for_anchor(url, response.text()) {
  55. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  56. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  57. }
  58. }
  59. Ok(response) => {
  60. if response.status().is_success() || response.status() == StatusCode::NOT_MODIFIED {
  61. LinkResult { code: Some(response.status()), error: None }
  62. } else {
  63. let error_string = if response.status().is_informational() {
  64. format!("Informational status code ({}) received", response.status())
  65. } else if response.status().is_redirection() {
  66. format!("Redirection status code ({}) received", response.status())
  67. } else if response.status().is_client_error() {
  68. format!("Client error status code ({}) received", response.status())
  69. } else if response.status().is_server_error() {
  70. format!("Server error status code ({}) received", response.status())
  71. } else {
  72. format!("Non-success status code ({}) received", response.status())
  73. };
  74. LinkResult { code: None, error: Some(error_string) }
  75. }
  76. }
  77. Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
  78. };
  79. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  80. res
  81. }
  82. fn has_anchor(url: &str) -> bool {
  83. match url.find('#') {
  84. Some(index) => match url.get(index..=index + 1) {
  85. Some("#/") | Some("#!") | None => false,
  86. Some(_) => true,
  87. },
  88. None => false,
  89. }
  90. }
  91. fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
  92. let body = body.unwrap();
  93. let index = url.find('#').unwrap();
  94. let anchor = url.get(index + 1..).unwrap();
  95. let checks: [String; 4] = [
  96. format!(" id='{}'", anchor),
  97. format!(r#" id="{}""#, anchor),
  98. format!(" name='{}'", anchor),
  99. format!(r#" name="{}""#, anchor),
  100. ];
  101. if checks.iter().any(|check| body[..].contains(&check[..])) {
  102. Ok(())
  103. } else {
  104. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  105. }
  106. }
  107. #[cfg(test)]
  108. mod tests {
  109. use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
  110. use mockito::mock;
  111. // NOTE: HTTP mock paths below are randomly generated to avoid name
  112. // collisions. Mocks with the same path can sometimes bleed between tests
  113. // and cause them to randomly pass/fail. Please make sure to use unique
  114. // paths when adding or modifying tests that use Mockito.
  115. #[test]
  116. fn can_validate_ok_links() {
  117. let url = format!("{}{}", mockito::server_url(), "/ekbtwxfhjw");
  118. let _m = mock("GET", "/ekbtwxfhjw")
  119. .with_header("Content-Type", "text/html")
  120. .with_body(format!(
  121. r#"<!DOCTYPE html>
  122. <html>
  123. <head>
  124. <title>Test</title>
  125. </head>
  126. <body>
  127. <a href="{}">Mock URL</a>
  128. </body>
  129. </html>
  130. "#,
  131. url
  132. ))
  133. .create();
  134. let res = check_url(&url, &LinkChecker::default());
  135. assert!(res.is_valid());
  136. assert!(LINKS.read().unwrap().get(&url).is_some());
  137. }
  138. #[test]
  139. fn can_follow_301_links() {
  140. let _m1 = mock("GET", "/c7qrtrv3zz")
  141. .with_status(301)
  142. .with_header("Content-Type", "text/plain")
  143. .with_header("Location", format!("{}/rbs5avjs8e", mockito::server_url()).as_str())
  144. .with_body("Redirecting...")
  145. .create();
  146. let _m2 = mock("GET", "/rbs5avjs8e")
  147. .with_header("Content-Type", "text/plain")
  148. .with_body("Test")
  149. .create();
  150. let url = format!("{}{}", mockito::server_url(), "/c7qrtrv3zz");
  151. let res = check_url(&url, &LinkChecker::default());
  152. assert!(res.is_valid());
  153. assert!(res.code.is_some());
  154. assert!(res.error.is_none());
  155. }
  156. #[test]
  157. fn can_fail_301_to_404_links() {
  158. let _m1 = mock("GET", "/cav9vibhsc")
  159. .with_status(301)
  160. .with_header("Content-Type", "text/plain")
  161. .with_header("Location", format!("{}/72zmfg4smd", mockito::server_url()).as_str())
  162. .with_body("Redirecting...")
  163. .create();
  164. let _m2 = mock("GET", "/72zmfg4smd")
  165. .with_status(404)
  166. .with_header("Content-Type", "text/plain")
  167. .with_body("Not Found")
  168. .create();
  169. let url = format!("{}{}", mockito::server_url(), "/cav9vibhsc");
  170. let res = check_url(&url, &LinkChecker::default());
  171. assert_eq!(res.is_valid(), false);
  172. assert!(res.code.is_none());
  173. assert!(res.error.is_some());
  174. }
  175. #[test]
  176. fn can_fail_404_links() {
  177. let _m = mock("GET", "/nlhab9c1vc")
  178. .with_status(404)
  179. .with_header("Content-Type", "text/plain")
  180. .with_body("Not Found")
  181. .create();
  182. let url = format!("{}{}", mockito::server_url(), "/nlhab9c1vc");
  183. let res = check_url(&url, &LinkChecker::default());
  184. assert_eq!(res.is_valid(), false);
  185. assert!(res.code.is_none());
  186. assert!(res.error.is_some());
  187. }
  188. #[test]
  189. fn can_fail_500_links() {
  190. let _m = mock("GET", "/qdbrssazes")
  191. .with_status(500)
  192. .with_header("Content-Type", "text/plain")
  193. .with_body("Internal Server Error")
  194. .create();
  195. let url = format!("{}{}", mockito::server_url(), "/qdbrssazes");
  196. let res = check_url(&url, &LinkChecker::default());
  197. assert_eq!(res.is_valid(), false);
  198. assert!(res.code.is_none());
  199. assert!(res.error.is_some());
  200. }
  201. #[test]
  202. fn can_fail_unresolved_links() {
  203. let res = check_url("https://t6l5cn9lpm.lxizfnzckd", &LinkChecker::default());
  204. assert_eq!(res.is_valid(), false);
  205. assert!(res.code.is_none());
  206. assert!(res.error.is_some());
  207. }
  208. #[test]
  209. fn can_validate_anchors() {
  210. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  211. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  212. let res = check_page_for_anchor(url, Ok(body));
  213. assert!(res.is_ok());
  214. }
  215. #[test]
  216. fn can_validate_anchors_with_other_quotes() {
  217. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  218. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  219. let res = check_page_for_anchor(url, Ok(body));
  220. assert!(res.is_ok());
  221. }
  222. #[test]
  223. fn can_validate_anchors_with_name_attr() {
  224. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  225. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  226. let res = check_page_for_anchor(url, Ok(body));
  227. assert!(res.is_ok());
  228. }
  229. #[test]
  230. fn can_fail_when_anchor_not_found() {
  231. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  232. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  233. let res = check_page_for_anchor(url, Ok(body));
  234. assert!(res.is_err());
  235. }
  236. #[test]
  237. fn can_check_url_for_anchor() {
  238. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  239. let res = has_anchor(url);
  240. assert_eq!(res, true);
  241. }
  242. #[test]
  243. fn will_return_false_when_no_anchor() {
  244. let url = "https://doc.rust-lang.org/std/index.html";
  245. let res = has_anchor(url);
  246. assert_eq!(res, false);
  247. }
  248. #[test]
  249. fn will_return_false_when_has_router_url() {
  250. let url = "https://doc.rust-lang.org/#/std";
  251. let res = has_anchor(url);
  252. assert_eq!(res, false);
  253. }
  254. #[test]
  255. fn will_return_false_when_has_router_url_alt() {
  256. let url = "https://doc.rust-lang.org/#!/std";
  257. let res = has_anchor(url);
  258. assert_eq!(res, false);
  259. }
  260. #[test]
  261. fn skip_anchor_prefixes() {
  262. let ignore_url = format!("{}{}", mockito::server_url(), "/ignore/");
  263. let config = LinkChecker { skip_prefixes: vec![], skip_anchor_prefixes: vec![ignore_url] };
  264. let _m1 = mock("GET", "/ignore/i30hobj1cy")
  265. .with_header("Content-Type", "text/html")
  266. .with_body(
  267. r#"<!DOCTYPE html>
  268. <html>
  269. <head>
  270. <title>Ignore</title>
  271. </head>
  272. <body>
  273. <p id="existent"></p>
  274. </body>
  275. </html>
  276. "#,
  277. )
  278. .create();
  279. // anchor check is ignored because the url matches the prefix
  280. let ignore = format!("{}{}", mockito::server_url(), "/ignore/i30hobj1cy#nonexistent");
  281. assert!(check_url(&ignore, &config).is_valid());
  282. let _m2 = mock("GET", "/guvqcqwmth")
  283. .with_header("Content-Type", "text/html")
  284. .with_body(
  285. r#"<!DOCTYPE html>
  286. <html>
  287. <head>
  288. <title>Test</title>
  289. </head>
  290. <body>
  291. <p id="existent"></p>
  292. </body>
  293. </html>
  294. "#,
  295. )
  296. .create();
  297. // other anchors are checked
  298. let existent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#existent");
  299. assert!(check_url(&existent, &config).is_valid());
  300. let nonexistent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#nonexistent");
  301. assert_eq!(check_url(&nonexistent, &config).is_valid(), false);
  302. }
  303. }