You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

194 lines
5.5KB

  1. extern crate reqwest;
  2. #[macro_use]
  3. extern crate lazy_static;
  4. extern crate errors;
  5. use reqwest::header::{HeaderMap, ACCEPT};
  6. use reqwest::StatusCode;
  7. use errors::Result;
  8. use std::collections::HashMap;
  9. use std::error::Error;
  10. use std::sync::{Arc, RwLock};
  11. #[derive(Clone, Debug, PartialEq)]
  12. pub struct LinkResult {
  13. pub code: Option<StatusCode>,
  14. /// Whether the HTTP request didn't make it to getting a HTTP code
  15. pub error: Option<String>,
  16. }
  17. impl LinkResult {
  18. pub fn is_valid(&self) -> bool {
  19. if self.error.is_some() {
  20. return false;
  21. }
  22. if let Some(c) = self.code {
  23. return c.is_success();
  24. }
  25. true
  26. }
  27. pub fn message(&self) -> String {
  28. if let Some(ref e) = self.error {
  29. return e.clone();
  30. }
  31. if let Some(c) = self.code {
  32. return format!("{}", c);
  33. }
  34. "Unknown error".to_string()
  35. }
  36. }
  37. lazy_static! {
  38. // Keep history of link checks so a rebuild doesn't have to check again
  39. static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
  40. }
  41. pub fn check_url(url: &str) -> LinkResult {
  42. {
  43. let guard = LINKS.read().unwrap();
  44. if let Some(res) = guard.get(url) {
  45. return res.clone();
  46. }
  47. }
  48. let mut headers = HeaderMap::new();
  49. headers.insert(ACCEPT, "text/html".parse().unwrap());
  50. headers.append(ACCEPT, "*/*".parse().unwrap());
  51. let client = reqwest::Client::new();
  52. // Need to actually do the link checking
  53. let res = match client.get(url).headers(headers).send() {
  54. Ok(ref mut response) if has_anchor(url) => {
  55. match check_page_for_anchor(url, response.text()) {
  56. Ok(_) => LinkResult { code: Some(response.status()), error: None },
  57. Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
  58. }
  59. }
  60. Ok(response) => LinkResult { code: Some(response.status()), error: None },
  61. Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) },
  62. };
  63. LINKS.write().unwrap().insert(url.to_string(), res.clone());
  64. res
  65. }
  66. fn has_anchor(url: &str) -> bool {
  67. match url.find('#') {
  68. Some(index) => match url.get(index..=index + 1) {
  69. Some("#/") | Some("#!") | None => false,
  70. Some(_) => true,
  71. },
  72. None => false,
  73. }
  74. }
  75. fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> {
  76. let body = body.unwrap();
  77. let index = url.find('#').unwrap();
  78. let anchor = url.get(index + 1..).unwrap();
  79. let checks: [String; 4] = [
  80. format!(" id='{}'", anchor),
  81. format!(r#" id="{}""#, anchor),
  82. format!(" name='{}'", anchor),
  83. format!(r#" name="{}""#, anchor),
  84. ];
  85. if checks.iter().any(|check| body[..].contains(&check[..])) {
  86. Ok(())
  87. } else {
  88. Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor)))
  89. }
  90. }
  91. #[cfg(test)]
  92. mod tests {
  93. use super::{check_page_for_anchor, check_url, has_anchor, LINKS};
  94. #[test]
  95. fn can_validate_ok_links() {
  96. let url = "https://google.com";
  97. let res = check_url(url);
  98. assert!(res.is_valid());
  99. assert!(LINKS.read().unwrap().get(url).is_some());
  100. let res = check_url(url);
  101. assert!(res.is_valid());
  102. }
  103. #[test]
  104. fn can_fail_404_links() {
  105. let res = check_url("https://google.comys");
  106. assert_eq!(res.is_valid(), false);
  107. assert!(res.code.is_none());
  108. assert!(res.error.is_some());
  109. }
  110. #[test]
  111. fn can_validate_anchors() {
  112. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  113. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  114. let res = check_page_for_anchor(url, Ok(body));
  115. assert!(res.is_ok());
  116. }
  117. #[test]
  118. fn can_validate_anchors_with_other_quotes() {
  119. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  120. let body = r#"<body><h3 id="method.collect">collect</h3></body>"#.to_string();
  121. let res = check_page_for_anchor(url, Ok(body));
  122. assert!(res.is_ok());
  123. }
  124. #[test]
  125. fn can_validate_anchors_with_name_attr() {
  126. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect";
  127. let body = r#"<body><h3 name="method.collect">collect</h3></body>"#.to_string();
  128. let res = check_page_for_anchor(url, Ok(body));
  129. assert!(res.is_ok());
  130. }
  131. #[test]
  132. fn can_fail_when_anchor_not_found() {
  133. let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me";
  134. let body = "<body><h3 id='method.collect'>collect</h3></body>".to_string();
  135. let res = check_page_for_anchor(url, Ok(body));
  136. assert!(res.is_err());
  137. }
  138. #[test]
  139. fn can_check_url_for_anchor() {
  140. let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library";
  141. let res = has_anchor(url);
  142. assert_eq!(res, true);
  143. }
  144. #[test]
  145. fn will_return_false_when_no_anchor() {
  146. let url = "https://doc.rust-lang.org/std/index.html";
  147. let res = has_anchor(url);
  148. assert_eq!(res, false);
  149. }
  150. #[test]
  151. fn will_return_false_when_has_router_url() {
  152. let url = "https://doc.rust-lang.org/#/std";
  153. let res = has_anchor(url);
  154. assert_eq!(res, false);
  155. }
  156. #[test]
  157. fn will_return_false_when_has_router_url_alt() {
  158. let url = "https://doc.rust-lang.org/#!/std";
  159. let res = has_anchor(url);
  160. assert_eq!(res, false);
  161. }
  162. }