* cargo fmt & clippy * Skip anchor checking for URL with prefix in configindex-subcmd
@@ -1241,6 +1241,7 @@ dependencies = [ | |||
name = "link_checker" | |||
version = "0.1.0" | |||
dependencies = [ | |||
"config 0.1.0", | |||
"errors 0.1.0", | |||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)", | |||
@@ -7,8 +7,8 @@ use syntect::parsing::{SyntaxSet, SyntaxSetBuilder}; | |||
use toml; | |||
use toml::Value as Toml; | |||
use errors::Result; | |||
use errors::Error; | |||
use errors::Result; | |||
use highlighting::THEME_SET; | |||
use theme::Theme; | |||
use utils::fs::read_file_with_error; | |||
@@ -86,7 +86,20 @@ impl Default for Taxonomy { | |||
} | |||
} | |||
type TranslateTerm = HashMap<String, String>; | |||
type TranslateTerm = HashMap<String, String>; | |||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] | |||
#[serde(default)] | |||
pub struct LinkChecker { | |||
/// Skip anchor checking for these URL prefixes | |||
pub skip_anchor_prefixes: Vec<String>, | |||
} | |||
impl Default for LinkChecker { | |||
fn default() -> LinkChecker { | |||
LinkChecker { skip_anchor_prefixes: Vec::new() } | |||
} | |||
} | |||
#[derive(Clone, Debug, Serialize, Deserialize)] | |||
#[serde(default)] | |||
@@ -152,6 +165,8 @@ pub struct Config { | |||
#[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need | |||
pub extra_syntax_set: Option<SyntaxSet>, | |||
pub link_checker: LinkChecker, | |||
/// All user params set in [extra] in the config | |||
pub extra: HashMap<String, Toml>, | |||
@@ -317,9 +332,16 @@ impl Config { | |||
Error::msg(format!("Translation for language '{}' is missing", lang.as_ref())) | |||
})?; | |||
terms.get(key.as_ref()).ok_or_else(|| { | |||
Error::msg(format!("Translation key '{}' for language '{}' is missing", key.as_ref(), lang.as_ref())) | |||
}).map(|term| term.to_string()) | |||
terms | |||
.get(key.as_ref()) | |||
.ok_or_else(|| { | |||
Error::msg(format!( | |||
"Translation key '{}' for language '{}' is missing", | |||
key.as_ref(), | |||
lang.as_ref() | |||
)) | |||
}) | |||
.map(|term| term.to_string()) | |||
} | |||
} | |||
@@ -346,6 +368,7 @@ impl Default for Config { | |||
translations: HashMap::new(), | |||
extra_syntaxes: Vec::new(), | |||
extra_syntax_set: None, | |||
link_checker: LinkChecker::default(), | |||
extra: HashMap::new(), | |||
build_timestamp: Some(1), | |||
} | |||
@@ -551,4 +574,25 @@ ignored_content = ["*.{graphml,iso}", "*.py?"] | |||
assert!(g.is_match("foo.py3")); | |||
assert!(!g.is_match("foo.py")); | |||
} | |||
#[test] | |||
fn link_checker_skip_anchor_prefixes() { | |||
let config_str = r#" | |||
title = "My site" | |||
base_url = "example.com" | |||
[link_checker] | |||
skip_anchor_prefixes = [ | |||
"https://caniuse.com/#feat=", | |||
"https://github.com/rust-lang/rust/blob/", | |||
] | |||
"#; | |||
let config = Config::parse(config_str).unwrap(); | |||
let v = config.link_checker.skip_anchor_prefixes; | |||
assert_eq!( | |||
v, | |||
vec!["https://caniuse.com/#feat=", "https://github.com/rust-lang/rust/blob/"] | |||
); | |||
} | |||
} |
@@ -14,7 +14,7 @@ extern crate utils; | |||
mod config; | |||
pub mod highlighting; | |||
mod theme; | |||
pub use config::{Config, Language, Taxonomy}; | |||
pub use config::{Config, Language, LinkChecker, Taxonomy}; | |||
use std::path::Path; | |||
@@ -272,7 +272,7 @@ impl ImageOp { | |||
} else { | |||
img | |||
} | |||
}, | |||
} | |||
Fill(w, h) => { | |||
let factor_w = img_w as f32 / w as f32; | |||
let factor_h = img_h as f32 / h as f32; | |||
@@ -1,7 +1,7 @@ | |||
use std::collections::{HashMap, HashSet}; | |||
use std::path::{Path, PathBuf}; | |||
use slotmap::{DenseSlotMap, DefaultKey}; | |||
use slotmap::{DefaultKey, DenseSlotMap}; | |||
use front_matter::SortBy; | |||
@@ -21,7 +21,9 @@ pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering { | |||
/// Takes a list of (page key, date, permalink) and sort them by dates if possible | |||
/// Pages without date will be put in the unsortable bucket | |||
/// The permalink is used to break ties | |||
pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||
pub fn sort_pages_by_date( | |||
pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>, | |||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | |||
pages.into_par_iter().partition(|page| page.1.is_some()); | |||
@@ -40,7 +42,9 @@ pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)> | |||
/// Takes a list of (page key, weight, permalink) and sort them by weight if possible | |||
/// Pages without weight will be put in the unsortable bucket | |||
/// The permalink is used to break ties | |||
pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||
pub fn sort_pages_by_weight( | |||
pages: Vec<(&DefaultKey, Option<usize>, &str)>, | |||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | |||
pages.into_par_iter().partition(|page| page.1.is_some()); | |||
@@ -57,7 +61,9 @@ pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> ( | |||
} | |||
/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight | |||
pub fn find_siblings(sorted: &[DefaultKey]) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> { | |||
pub fn find_siblings( | |||
sorted: &[DefaultKey], | |||
) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> { | |||
let mut res = Vec::with_capacity(sorted.len()); | |||
let length = sorted.len(); | |||
@@ -7,4 +7,5 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"] | |||
reqwest = "0.9" | |||
lazy_static = "1" | |||
config = { path = "../config" } | |||
errors = { path = "../errors" } |
@@ -2,11 +2,13 @@ extern crate reqwest; | |||
#[macro_use] | |||
extern crate lazy_static; | |||
extern crate config; | |||
extern crate errors; | |||
use reqwest::header::{HeaderMap, ACCEPT}; | |||
use reqwest::StatusCode; | |||
use config::LinkChecker; | |||
use errors::Result; | |||
use std::collections::HashMap; | |||
@@ -51,7 +53,7 @@ lazy_static! { | |||
static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new())); | |||
} | |||
pub fn check_url(url: &str) -> LinkResult { | |||
pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult { | |||
{ | |||
let guard = LINKS.read().unwrap(); | |||
if let Some(res) = guard.get(url) { | |||
@@ -65,9 +67,11 @@ pub fn check_url(url: &str) -> LinkResult { | |||
let client = reqwest::Client::new(); | |||
let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix)); | |||
// Need to actually do the link checking | |||
let res = match client.get(url).headers(headers).send() { | |||
Ok(ref mut response) if has_anchor(url) => { | |||
Ok(ref mut response) if check_anchor && has_anchor(url) => { | |||
match check_page_for_anchor(url, response.text()) { | |||
Ok(_) => LinkResult { code: Some(response.status()), error: None }, | |||
Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, | |||
@@ -111,21 +115,21 @@ fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> | |||
#[cfg(test)] | |||
mod tests { | |||
use super::{check_page_for_anchor, check_url, has_anchor, LINKS}; | |||
use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS}; | |||
#[test] | |||
fn can_validate_ok_links() { | |||
let url = "https://google.com"; | |||
let res = check_url(url); | |||
let res = check_url(url, &LinkChecker::default()); | |||
assert!(res.is_valid()); | |||
assert!(LINKS.read().unwrap().get(url).is_some()); | |||
let res = check_url(url); | |||
let res = check_url(url, &LinkChecker::default()); | |||
assert!(res.is_valid()); | |||
} | |||
#[test] | |||
fn can_fail_404_links() { | |||
let res = check_url("https://google.comys"); | |||
let res = check_url("https://google.comys", &LinkChecker::default()); | |||
assert_eq!(res.is_valid(), false); | |||
assert!(res.code.is_none()); | |||
assert!(res.error.is_some()); | |||
@@ -190,4 +194,23 @@ mod tests { | |||
let res = has_anchor(url); | |||
assert_eq!(res, false); | |||
} | |||
#[test] | |||
fn skip_anchor_prefixes() { | |||
let config = LinkChecker { | |||
skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()], | |||
}; | |||
// anchor check is ignored because the url matches the prefix | |||
let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214"; | |||
assert!(check_url(&permalink, &config).is_valid()); | |||
// other anchors are checked | |||
let glossary = "https://help.github.com/en/articles/github-glossary#blame"; | |||
assert!(check_url(&glossary, &config).is_valid()); | |||
let glossary_invalid = | |||
"https://help.github.com/en/articles/github-glossary#anchor-does-not-exist"; | |||
assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false); | |||
} | |||
} |
@@ -335,7 +335,7 @@ fn is_section(path: &str, languages_codes: &[&str]) -> bool { | |||
} | |||
} | |||
return false; | |||
false | |||
} | |||
/// What happens when a section or a page is created/edited | |||
@@ -296,8 +296,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||
let start_idx = heading_ref.start_idx; | |||
let end_idx = heading_ref.end_idx; | |||
let title = get_text(&events[start_idx + 1..end_idx]); | |||
let id = | |||
heading_ref.id.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0)); | |||
let id = heading_ref | |||
.id | |||
.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0)); | |||
inserted_anchors.push(id.clone()); | |||
// insert `id` to the tag | |||
@@ -326,7 +327,8 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||
// record heading to make table of contents | |||
let permalink = format!("{}#{}", context.current_page_permalink, id); | |||
let h = Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() }; | |||
let h = | |||
Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() }; | |||
headings.push(h); | |||
} | |||
@@ -399,7 +399,7 @@ impl Site { | |||
all_links | |||
.par_iter() | |||
.filter_map(|(page_path, link)| { | |||
let res = check_url(&link); | |||
let res = check_url(&link, &self.config.link_checker); | |||
if res.is_valid() { | |||
None | |||
} else { | |||
@@ -662,3 +662,14 @@ fn can_ignore_markdown_content() { | |||
let (_, _tmp_dir, public) = build_site("test_site"); | |||
assert!(!file_exists!(public, "posts/ignored/index.html")); | |||
} | |||
#[test] | |||
fn check_site() { | |||
let (mut site, _tmp_dir, _public) = build_site("test_site"); | |||
let prefixes = &site.config.link_checker.skip_anchor_prefixes; | |||
assert_eq!(prefixes, &vec!["https://github.com/rust-lang/rust/blob/"]); | |||
site.config.enable_check_mode(); | |||
site.load().expect("link check test_site"); | |||
} |
@@ -34,9 +34,10 @@ impl TeraFn for Trans { | |||
let lang = optional_arg!(String, args.get("lang"), "`trans`: `lang` must be a string.") | |||
.unwrap_or_else(|| self.config.default_language.clone()); | |||
let term = self.config.get_translation(lang, key).map_err(|e| { | |||
Error::chain("Failed to retreive term translation", e) | |||
})?; | |||
let term = self | |||
.config | |||
.get_translation(lang, key) | |||
.map_err(|e| Error::chain("Failed to retreive term translation", e))?; | |||
Ok(to_value(term).unwrap()) | |||
} | |||
@@ -509,7 +510,6 @@ mod tests { | |||
assert!(static_fn.call(&args).is_err()); | |||
} | |||
const TRANS_CONFIG: &str = r#" | |||
base_url = "https://remplace-par-ton-url.fr" | |||
default_language = "fr" | |||
@@ -95,8 +95,14 @@ extra_syntaxes = [] | |||
# | |||
# [translations.en] | |||
# title = "A title" | |||
# | |||
[translations] | |||
# Configure the link checker | |||
[link_checker] | |||
# Skip anchor checking for external URLs that start with these prefixes | |||
skip_anchor_prefixes = [ | |||
"https://caniuse.com/", | |||
] | |||
# You can put any kind of data in there and it | |||
# will be accessible in all templates | |||
@@ -13,5 +13,10 @@ extra_syntaxes = ["syntaxes"] | |||
ignored_content = ["*/ignored.md"] | |||
[link_checker] | |||
skip_anchor_prefixes = [ | |||
"https://github.com/rust-lang/rust/blob/", | |||
] | |||
[extra.author] | |||
name = "Vincent Prouillet" |
@@ -5,3 +5,9 @@ date = 2017-01-01 | |||
+++ | |||
A simple page | |||
<!-- more --> | |||
Link to some rust-lang [source code][permalink]. | |||
[permalink]: https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214 |