You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

357 lines
11KB

  1. use lazy_static::lazy_static;
  2. use reqwest::header::{HeaderMap, ACCEPT};
  3. use reqwest::{blocking::Client, StatusCode};
  4. use config::LinkChecker;
  5. use errors::Result;
  6. use std::collections::HashMap;
  7. use std::sync::{Arc, RwLock};
  8. #[derive(Clone, Debug, PartialEq)]
  9. pub struct LinkResult {
  10. pub code: Option<StatusCode>,
  11. /// Whether the HTTP request didn't make it to getting a HTTP code
  12. pub error: Option<String>,
  13. }
  14. impl LinkResult {
  15. pub fn is_valid(&self) -> bool {
  16. if self.error.is_some() {
  17. return false;
  18. }
  19. if let Some(c) = self.code {
  20. return c.is_success() || c == StatusCode::NOT_MODIFIED;
  21. }
  22. true
  23. }
  24. pub fn message(&self) -> String {
  25. if let Some(ref e) = self.error {
  26. return e.clone();
  27. }
  28. if let Some(c) = self.code {
  29. return format!("{}", c);
  30. }
  31. "Unknown error".to_string()
  32. }
  33. }
  34. lazy_static! {
  35. // Keep history of link checks so a rebuild doesn't have to check again
  36. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  37. }
  38. pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
  39. {
  40. let guard = LINKS.read().unwrap();
  41. if let Some(res) = guard.get(url) {
  42. return res.clone();
  43. }
  44. }
  45. let mut headers = HeaderMap::new();
  46. headers.insert(ACCEPT, "text/html".parse().unwrap());
  47. headers.append(ACCEPT, "*/*".parse().unwrap());
  48. let client = Client::new();
  49. let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));
  50. // Need to actually do the link checking
  51. let res = match client.get(url).headers(headers).send() {
  52. Ok(ref mut response) if check_anchor && has_anchor(url) => {
  53. let body = {
  54. let mut buf: Vec<u8> = vec![];
  55. response.copy_to(&mut buf).unwrap();
  56. String::from_utf8(buf).unwrap()
  57. };
  58. match check_page_for_anchor(url, body) {
  59. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  60. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  61. }
  62. }
  63. Ok(response) => {
  64. if response.status().is_success() || response.status() == StatusCode::NOT_MODIFIED {
  65. LinkResult { code: Some(response.status()), error: None }
  66. } else {
  67. let error_string = if response.status().is_informational() {
  68. format!("Informational status code ({}) received", response.status())
  69. } else if response.status().is_redirection() {
  70. format!("Redirection status code ({}) received", response.status())
  71. } else if response.status().is_client_error() {
  72. format!("Client error status code ({}) received", response.status())
  73. } else if response.status().is_server_error() {
  74. format!("Server error status code ({}) received", response.status())
  75. } else {
  76. format!("Non-success status code ({}) received", response.status())
  77. };
  78. LinkResult { code: None, error: Some(error_string) }
  79. }
  80. }
  81. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  82. };
  83. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  84. res
  85. }
  86. fn has_anchor(url: &str) -> bool {
  87. match url.find('#') {
  88. Some(index) => match url.get(index..=index + 1) {
  89. Some("#/") | Some("#!") | None => false,
  90. Some(_) => true,
  91. },
  92. None => false,
  93. }
  94. }
  95. fn check_page_for_anchor(url: &str, body: String) -> Result<()> {
  96. let index = url.find('#').unwrap();
  97. let anchor = url.get(index + 1..).unwrap();
  98. let checks: [String; 4] = [
  99. format!(" id='{}'", anchor),
  100. format!(r#" id="{}""#, anchor),
  101. format!(" name='{}'", anchor),
  102. format!(r#" name="{}""#, anchor),
  103. ];
  104. if checks.iter().any(|check| body[..].contains(&check[..])) {
  105. Ok(())
  106. } else {
  107. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  108. }
  109. }
  110. #[cfg(test)]
  111. mod tests {
  112. use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};
  113. use mockito::mock;
  114. // NOTE: HTTP mock paths below are randomly generated to avoid name
  115. // collisions. Mocks with the same path can sometimes bleed between tests
  116. // and cause them to randomly pass/fail. Please make sure to use unique
  117. // paths when adding or modifying tests that use Mockito.
  118. #[test]
  119. fn can_validate_ok_links() {
  120. let url = format!("{}{}", mockito::server_url(), "/ekbtwxfhjw");
  121. let _m = mock("GET", "/ekbtwxfhjw")
  122. .with_header("Content-Type", "text/html")
  123. .with_body(format!(
  124. r#"<!DOCTYPE html>
  125. <html>
  126. <head>
  127. <title>Test</title>
  128. </head>
  129. <body>
  130. <a href="{}">Mock URL</a>
  131. </body>
  132. </html>
  133. "#,
  134. url
  135. ))
  136. .create();
  137. let res = check_url(&url, &LinkChecker::default());
  138. assert!(res.is_valid());
  139. assert!(LINKS.read().unwrap().get(&url).is_some());
  140. }
  141. #[test]
  142. fn can_follow_301_links() {
  143. let _m1 = mock("GET", "/c7qrtrv3zz")
  144. .with_status(301)
  145. .with_header("Content-Type", "text/plain")
  146. .with_header("Location", format!("{}/rbs5avjs8e", mockito::server_url()).as_str())
  147. .with_body("Redirecting...")
  148. .create();
  149. let _m2 = mock("GET", "/rbs5avjs8e")
  150. .with_header("Content-Type", "text/plain")
  151. .with_body("Test")
  152. .create();
  153. let url = format!("{}{}", mockito::server_url(), "/c7qrtrv3zz");
  154. let res = check_url(&url, &LinkChecker::default());
  155. assert!(res.is_valid());
  156. assert!(res.code.is_some());
  157. assert!(res.error.is_none());
  158. }
  159. #[test]
  160. fn can_fail_301_to_404_links() {
  161. let _m1 = mock("GET", "/cav9vibhsc")
  162. .with_status(301)
  163. .with_header("Content-Type", "text/plain")
  164. .with_header("Location", format!("{}/72zmfg4smd", mockito::server_url()).as_str())
  165. .with_body("Redirecting...")
  166. .create();
  167. let _m2 = mock("GET", "/72zmfg4smd")
  168. .with_status(404)
  169. .with_header("Content-Type", "text/plain")
  170. .with_body("Not Found")
  171. .create();
  172. let url = format!("{}{}", mockito::server_url(), "/cav9vibhsc");
  173. let res = check_url(&url, &LinkChecker::default());
  174. assert_eq!(res.is_valid(), false);
  175. assert!(res.code.is_none());
  176. assert!(res.error.is_some());
  177. }
  178. #[test]
  179. fn can_fail_404_links() {
  180. let _m = mock("GET", "/nlhab9c1vc")
  181. .with_status(404)
  182. .with_header("Content-Type", "text/plain")
  183. .with_body("Not Found")
  184. .create();
  185. let url = format!("{}{}", mockito::server_url(), "/nlhab9c1vc");
  186. let res = check_url(&url, &LinkChecker::default());
  187. assert_eq!(res.is_valid(), false);
  188. assert!(res.code.is_none());
  189. assert!(res.error.is_some());
  190. }
  191. #[test]
  192. fn can_fail_500_links() {
  193. let _m = mock("GET", "/qdbrssazes")
  194. .with_status(500)
  195. .with_header("Content-Type", "text/plain")
  196. .with_body("Internal Server Error")
  197. .create();
  198. let url = format!("{}{}", mockito::server_url(), "/qdbrssazes");
  199. let res = check_url(&url, &LinkChecker::default());
  200. assert_eq!(res.is_valid(), false);
  201. assert!(res.code.is_none());
  202. assert!(res.error.is_some());
  203. }
  204. #[test]
  205. fn can_fail_unresolved_links() {
  206. let res = check_url("https://t6l5cn9lpm.lxizfnzckd", &LinkChecker::default());
  207. assert_eq!(res.is_valid(), false);
  208. assert!(res.code.is_none());
  209. assert!(res.error.is_some());
  210. }
  211. #[test]
  212. fn can_validate_anchors() {
  213. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  214. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  215. let res = check_page_for_anchor(url, body);
  216. assert!(res.is_ok());
  217. }
  218. #[test]
  219. fn can_validate_anchors_with_other_quotes() {
  220. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  221. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  222. let res = check_page_for_anchor(url, body);
  223. assert!(res.is_ok());
  224. }
  225. #[test]
  226. fn can_validate_anchors_with_name_attr() {
  227. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  228. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  229. let res = check_page_for_anchor(url, body);
  230. assert!(res.is_ok());
  231. }
  232. #[test]
  233. fn can_fail_when_anchor_not_found() {
  234. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  235. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  236. let res = check_page_for_anchor(url, body);
  237. assert!(res.is_err());
  238. }
  239. #[test]
  240. fn can_check_url_for_anchor() {
  241. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  242. let res = has_anchor(url);
  243. assert_eq!(res, true);
  244. }
  245. #[test]
  246. fn will_return_false_when_no_anchor() {
  247. let url = "https://doc.rust-lang.org/std/index.html";
  248. let res = has_anchor(url);
  249. assert_eq!(res, false);
  250. }
  251. #[test]
  252. fn will_return_false_when_has_router_url() {
  253. let url = "https://doc.rust-lang.org/#/std";
  254. let res = has_anchor(url);
  255. assert_eq!(res, false);
  256. }
  257. #[test]
  258. fn will_return_false_when_has_router_url_alt() {
  259. let url = "https://doc.rust-lang.org/#!/std";
  260. let res = has_anchor(url);
  261. assert_eq!(res, false);
  262. }
  263. #[test]
  264. fn skip_anchor_prefixes() {
  265. let ignore_url = format!("{}{}", mockito::server_url(), "/ignore/");
  266. let config = LinkChecker { skip_prefixes: vec![], skip_anchor_prefixes: vec![ignore_url] };
  267. let _m1 = mock("GET", "/ignore/i30hobj1cy")
  268. .with_header("Content-Type", "text/html")
  269. .with_body(
  270. r#"<!DOCTYPE html>
  271. <html>
  272. <head>
  273. <title>Ignore</title>
  274. </head>
  275. <body>
  276. <p id="existent"></p>
  277. </body>
  278. </html>
  279. "#,
  280. )
  281. .create();
  282. // anchor check is ignored because the url matches the prefix
  283. let ignore = format!("{}{}", mockito::server_url(), "/ignore/i30hobj1cy#nonexistent");
  284. assert!(check_url(&ignore, &config).is_valid());
  285. let _m2 = mock("GET", "/guvqcqwmth")
  286. .with_header("Content-Type", "text/html")
  287. .with_body(
  288. r#"<!DOCTYPE html>
  289. <html>
  290. <head>
  291. <title>Test</title>
  292. </head>
  293. <body>
  294. <p id="existent"></p>
  295. </body>
  296. </html>
  297. "#,
  298. )
  299. .create();
  300. // other anchors are checked
  301. let existent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#existent");
  302. assert!(check_url(&existent, &config).is_valid());
  303. let nonexistent = format!("{}{}", mockito::server_url(), "/guvqcqwmth#nonexistent");
  304. assert_eq!(check_url(&nonexistent, &config).is_valid(), false);
  305. }
  306. }