diff --git a/Cargo.lock b/Cargo.lock index ea3f288..3b177d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1223,6 +1223,7 @@ dependencies = [ name = "link_checker" version = "0.1.0" dependencies = [ + "errors 0.1.0", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.20 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/components/link_checker/Cargo.toml b/components/link_checker/Cargo.toml index 2c2715d..d40a234 100644 --- a/components/link_checker/Cargo.toml +++ b/components/link_checker/Cargo.toml @@ -6,3 +6,5 @@ authors = ["Vincent Prouillet "] [dependencies] reqwest = "0.9" lazy_static = "1" + +errors = { path = "../errors" } diff --git a/components/link_checker/src/lib.rs b/components/link_checker/src/lib.rs index ce6c36a..10297f9 100644 --- a/components/link_checker/src/lib.rs +++ b/components/link_checker/src/lib.rs @@ -2,8 +2,13 @@ extern crate reqwest; #[macro_use] extern crate lazy_static; +extern crate errors; + use reqwest::header::{HeaderMap, ACCEPT}; use reqwest::StatusCode; + +use errors::Result; + use std::collections::HashMap; use std::error::Error; use std::sync::{Arc, RwLock}; @@ -62,6 +67,12 @@ pub fn check_url(url: &str) -> LinkResult { // Need to actually do the link checking let res = match client.get(url).headers(headers).send() { + Ok(ref mut response) if has_anchor(url) => { + match check_page_for_anchor(url, response.text()) { + Ok(_) => LinkResult { code: Some(response.status()), error: None }, + Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, + } + } Ok(response) => LinkResult { code: Some(response.status()), error: None }, Err(e) => LinkResult { code: None, error: Some(e.description().to_string()) }, }; @@ -70,9 +81,37 @@ pub fn check_url(url: &str) -> LinkResult { res } +fn has_anchor(url: &str) -> bool { + match url.find('#') { + Some(index) => match url.get(index..=index + 1) { + Some("#/") | Some("#!") | None => false, + Some(_) => true, + }, + None => false, + } +} + +fn check_page_for_anchor(url: &str, body: reqwest::Result) -> Result<()> { + let body = body.unwrap(); + let index = url.find('#').unwrap(); + let anchor = url.get(index + 1..).unwrap(); + let checks: [String; 4] = [ + format!(" id='{}'", anchor), + format!(r#" id="{}""#, anchor), + format!(" name='{}'", anchor), + format!(r#" name="{}""#, anchor), + ]; + + if checks.iter().any(|check| body[..].contains(&check[..])) { + Ok(()) + } else { + Err(errors::Error::from(format!("Anchor `#{}` not found on page", anchor))) + } +} + #[cfg(test)] mod tests { - use super::{check_url, LINKS}; + use super::{check_page_for_anchor, check_url, has_anchor, LINKS}; #[test] fn can_validate_ok_links() { @@ -91,4 +130,64 @@ mod tests { assert!(res.code.is_none()); assert!(res.error.is_some()); } + + #[test] + fn can_validate_anchors() { + let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; + let body = "

collect

".to_string(); + let res = check_page_for_anchor(url, Ok(body)); + assert!(res.is_ok()); + } + + #[test] + fn can_validate_anchors_with_other_quotes() { + let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; + let body = r#"

collect

"#.to_string(); + let res = check_page_for_anchor(url, Ok(body)); + assert!(res.is_ok()); + } + + #[test] + fn can_validate_anchors_with_name_attr() { + let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.collect"; + let body = r#"

collect

"#.to_string(); + let res = check_page_for_anchor(url, Ok(body)); + assert!(res.is_ok()); + } + + #[test] + fn can_fail_when_anchor_not_found() { + let url = "https://doc.rust-lang.org/std/iter/trait.Iterator.html#me"; + let body = "

collect

".to_string(); + let res = check_page_for_anchor(url, Ok(body)); + assert!(res.is_err()); + } + + #[test] + fn can_check_url_for_anchor() { + let url = "https://doc.rust-lang.org/std/index.html#the-rust-standard-library"; + let res = has_anchor(url); + assert_eq!(res, true); + } + + #[test] + fn will_return_false_when_no_anchor() { + let url = "https://doc.rust-lang.org/std/index.html"; + let res = has_anchor(url); + assert_eq!(res, false); + } + + #[test] + fn will_return_false_when_has_router_url() { + let url = "https://doc.rust-lang.org/#/std"; + let res = has_anchor(url); + assert_eq!(res, false); + } + + #[test] + fn will_return_false_when_has_router_url_alt() { + let url = "https://doc.rust-lang.org/#!/std"; + let res = has_anchor(url); + assert_eq!(res, false); + } }