From 6149fd17e11559be582cfbbb8ee7417e4977cde3 Mon Sep 17 00:00:00 2001 From: Tjeu Kayim <15987676+TjeuKayim@users.noreply.github.com> Date: Mon, 14 Oct 2019 18:31:03 +0200 Subject: [PATCH] Skip anchor checking for URL with prefix in config (#812) * cargo fmt & clippy * Skip anchor checking for URL with prefix in config --- Cargo.lock | 1 + components/config/src/config.rs | 54 +++++++++++++++++-- components/config/src/lib.rs | 2 +- components/imageproc/src/lib.rs | 2 +- components/library/src/library.rs | 2 +- components/library/src/sorting.rs | 12 +++-- components/link_checker/Cargo.toml | 1 + components/link_checker/src/lib.rs | 35 +++++++++--- components/rebuild/src/lib.rs | 2 +- components/rendering/src/markdown.rs | 8 +-- components/site/src/lib.rs | 2 +- components/site/tests/site.rs | 11 ++++ components/templates/src/global_fns/mod.rs | 8 +-- .../getting-started/configuration.md | 10 +++- test_site/config.toml | 5 ++ .../posts/tutorials/programming/rust.md | 6 +++ 16 files changed, 133 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f33231..8aad6d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1241,6 +1241,7 @@ dependencies = [ name = "link_checker" version = "0.1.0" dependencies = [ + "config 0.1.0", "errors 0.1.0", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/components/config/src/config.rs b/components/config/src/config.rs index 7a38760..b2e31a0 100644 --- a/components/config/src/config.rs +++ b/components/config/src/config.rs @@ -7,8 +7,8 @@ use syntect::parsing::{SyntaxSet, SyntaxSetBuilder}; use toml; use toml::Value as Toml; -use errors::Result; use errors::Error; +use errors::Result; use highlighting::THEME_SET; use theme::Theme; use utils::fs::read_file_with_error; @@ -86,7 +86,20 @@ impl Default for Taxonomy { } } -type TranslateTerm = HashMap; +type TranslateTerm = HashMap; + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default)] +pub struct LinkChecker { + /// Skip anchor checking for these URL prefixes + pub skip_anchor_prefixes: Vec, +} + +impl Default for LinkChecker { + fn default() -> LinkChecker { + LinkChecker { skip_anchor_prefixes: Vec::new() } + } +} #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(default)] @@ -152,6 +165,8 @@ pub struct Config { #[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need pub extra_syntax_set: Option, + pub link_checker: LinkChecker, + /// All user params set in [extra] in the config pub extra: HashMap, @@ -317,9 +332,16 @@ impl Config { Error::msg(format!("Translation for language '{}' is missing", lang.as_ref())) })?; - terms.get(key.as_ref()).ok_or_else(|| { - Error::msg(format!("Translation key '{}' for language '{}' is missing", key.as_ref(), lang.as_ref())) - }).map(|term| term.to_string()) + terms + .get(key.as_ref()) + .ok_or_else(|| { + Error::msg(format!( + "Translation key '{}' for language '{}' is missing", + key.as_ref(), + lang.as_ref() + )) + }) + .map(|term| term.to_string()) } } @@ -346,6 +368,7 @@ impl Default for Config { translations: HashMap::new(), extra_syntaxes: Vec::new(), extra_syntax_set: None, + link_checker: LinkChecker::default(), extra: HashMap::new(), build_timestamp: Some(1), } @@ -551,4 +574,25 @@ ignored_content = ["*.{graphml,iso}", "*.py?"] assert!(g.is_match("foo.py3")); assert!(!g.is_match("foo.py")); } + + #[test] + fn link_checker_skip_anchor_prefixes() { + let config_str = r#" +title = "My site" +base_url = "example.com" + +[link_checker] +skip_anchor_prefixes = [ + "https://caniuse.com/#feat=", + "https://github.com/rust-lang/rust/blob/", +] + "#; + + let config = Config::parse(config_str).unwrap(); + let v = config.link_checker.skip_anchor_prefixes; + assert_eq!( + v, + vec!["https://caniuse.com/#feat=", "https://github.com/rust-lang/rust/blob/"] + ); + } } diff --git a/components/config/src/lib.rs b/components/config/src/lib.rs index 3c7443a..c3d65f8 100644 --- a/components/config/src/lib.rs +++ b/components/config/src/lib.rs @@ -14,7 +14,7 @@ extern crate utils; mod config; pub mod highlighting; mod theme; -pub use config::{Config, Language, Taxonomy}; +pub use config::{Config, Language, LinkChecker, Taxonomy}; use std::path::Path; diff --git a/components/imageproc/src/lib.rs b/components/imageproc/src/lib.rs index 7c754b5..a8fc10f 100644 --- a/components/imageproc/src/lib.rs +++ b/components/imageproc/src/lib.rs @@ -272,7 +272,7 @@ impl ImageOp { } else { img } - }, + } Fill(w, h) => { let factor_w = img_w as f32 / w as f32; let factor_h = img_h as f32 / h as f32; diff --git a/components/library/src/library.rs b/components/library/src/library.rs index e4d16e2..556390b 100644 --- a/components/library/src/library.rs +++ b/components/library/src/library.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; -use slotmap::{DenseSlotMap, DefaultKey}; +use slotmap::{DefaultKey, DenseSlotMap}; use front_matter::SortBy; diff --git a/components/library/src/sorting.rs b/components/library/src/sorting.rs index a666a91..999e1ab 100644 --- a/components/library/src/sorting.rs +++ b/components/library/src/sorting.rs @@ -21,7 +21,9 @@ pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering { /// Takes a list of (page key, date, permalink) and sort them by dates if possible /// Pages without date will be put in the unsortable bucket /// The permalink is used to break ties -pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option, &str)>) -> (Vec, Vec) { +pub fn sort_pages_by_date( + pages: Vec<(&DefaultKey, Option, &str)>, +) -> (Vec, Vec) { let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = pages.into_par_iter().partition(|page| page.1.is_some()); @@ -40,7 +42,9 @@ pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option, &str)> /// Takes a list of (page key, weight, permalink) and sort them by weight if possible /// Pages without weight will be put in the unsortable bucket /// The permalink is used to break ties -pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option, &str)>) -> (Vec, Vec) { +pub fn sort_pages_by_weight( + pages: Vec<(&DefaultKey, Option, &str)>, +) -> (Vec, Vec) { let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = pages.into_par_iter().partition(|page| page.1.is_some()); @@ -57,7 +61,9 @@ pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option, &str)>) -> ( } /// Find the lighter/heavier and earlier/later pages for all pages having a date/weight -pub fn find_siblings(sorted: &[DefaultKey]) -> Vec<(DefaultKey, Option, Option)> { +pub fn find_siblings( + sorted: &[DefaultKey], +) -> Vec<(DefaultKey, Option, Option)> { let mut res = Vec::with_capacity(sorted.len()); let length = sorted.len(); diff --git a/components/link_checker/Cargo.toml b/components/link_checker/Cargo.toml index d40a234..98e8a39 100644 --- a/components/link_checker/Cargo.toml +++ b/components/link_checker/Cargo.toml @@ -7,4 +7,5 @@ authors = ["Vincent Prouillet "] reqwest = "0.9" lazy_static = "1" +config = { path = "../config" } errors = { path = "../errors" } diff --git a/components/link_checker/src/lib.rs b/components/link_checker/src/lib.rs index 10297f9..90fe0dc 100644 --- a/components/link_checker/src/lib.rs +++ b/components/link_checker/src/lib.rs @@ -2,11 +2,13 @@ extern crate reqwest; #[macro_use] extern crate lazy_static; +extern crate config; extern crate errors; use reqwest::header::{HeaderMap, ACCEPT}; use reqwest::StatusCode; +use config::LinkChecker; use errors::Result; use std::collections::HashMap; @@ -51,7 +53,7 @@ lazy_static! { static ref LINKS: Arc>> = Arc::new(RwLock::new(HashMap::new())); } -pub fn check_url(url: &str) -> LinkResult { +pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult { { let guard = LINKS.read().unwrap(); if let Some(res) = guard.get(url) { @@ -65,9 +67,11 @@ pub fn check_url(url: &str) -> LinkResult { let client = reqwest::Client::new(); + let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix)); + // Need to actually do the link checking let res = match client.get(url).headers(headers).send() { - Ok(ref mut response) if has_anchor(url) => { + Ok(ref mut response) if check_anchor && has_anchor(url) => { match check_page_for_anchor(url, response.text()) { Ok(_) => LinkResult { code: Some(response.status()), error: None }, Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, @@ -111,21 +115,21 @@ fn check_page_for_anchor(url: &str, body: reqwest::Result) -> Result<()> #[cfg(test)] mod tests { - use super::{check_page_for_anchor, check_url, has_anchor, LINKS}; + use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS}; #[test] fn can_validate_ok_links() { let url = "https://google.com"; - let res = check_url(url); + let res = check_url(url, &LinkChecker::default()); assert!(res.is_valid()); assert!(LINKS.read().unwrap().get(url).is_some()); - let res = check_url(url); + let res = check_url(url, &LinkChecker::default()); assert!(res.is_valid()); } #[test] fn can_fail_404_links() { - let res = check_url("https://google.comys"); + let res = check_url("https://google.comys", &LinkChecker::default()); assert_eq!(res.is_valid(), false); assert!(res.code.is_none()); assert!(res.error.is_some()); @@ -190,4 +194,23 @@ mod tests { let res = has_anchor(url); assert_eq!(res, false); } + + #[test] + fn skip_anchor_prefixes() { + let config = LinkChecker { + skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()], + }; + + // anchor check is ignored because the url matches the prefix + let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214"; + assert!(check_url(&permalink, &config).is_valid()); + + // other anchors are checked + let glossary = "https://help.github.com/en/articles/github-glossary#blame"; + assert!(check_url(&glossary, &config).is_valid()); + + let glossary_invalid = + "https://help.github.com/en/articles/github-glossary#anchor-does-not-exist"; + assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false); + } } diff --git a/components/rebuild/src/lib.rs b/components/rebuild/src/lib.rs index 02e6367..e120b98 100644 --- a/components/rebuild/src/lib.rs +++ b/components/rebuild/src/lib.rs @@ -335,7 +335,7 @@ fn is_section(path: &str, languages_codes: &[&str]) -> bool { } } - return false; + false } /// What happens when a section or a page is created/edited diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 673eca6..c5fcbe8 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -296,8 +296,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result Result + +Link to some rust-lang [source code][permalink]. + +[permalink]: https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214