* cargo fmt & clippy * Skip anchor checking for URL with prefix in configindex-subcmd
@@ -1241,6 +1241,7 @@ dependencies = [ | |||||
name = "link_checker" | name = "link_checker" | ||||
version = "0.1.0" | version = "0.1.0" | ||||
dependencies = [ | dependencies = [ | ||||
"config 0.1.0", | |||||
"errors 0.1.0", | "errors 0.1.0", | ||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)", | "reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
@@ -7,8 +7,8 @@ use syntect::parsing::{SyntaxSet, SyntaxSetBuilder}; | |||||
use toml; | use toml; | ||||
use toml::Value as Toml; | use toml::Value as Toml; | ||||
use errors::Result; | |||||
use errors::Error; | use errors::Error; | ||||
use errors::Result; | |||||
use highlighting::THEME_SET; | use highlighting::THEME_SET; | ||||
use theme::Theme; | use theme::Theme; | ||||
use utils::fs::read_file_with_error; | use utils::fs::read_file_with_error; | ||||
@@ -86,7 +86,20 @@ impl Default for Taxonomy { | |||||
} | } | ||||
} | } | ||||
type TranslateTerm = HashMap<String, String>; | |||||
type TranslateTerm = HashMap<String, String>; | |||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] | |||||
#[serde(default)] | |||||
pub struct LinkChecker { | |||||
/// Skip anchor checking for these URL prefixes | |||||
pub skip_anchor_prefixes: Vec<String>, | |||||
} | |||||
impl Default for LinkChecker { | |||||
fn default() -> LinkChecker { | |||||
LinkChecker { skip_anchor_prefixes: Vec::new() } | |||||
} | |||||
} | |||||
#[derive(Clone, Debug, Serialize, Deserialize)] | #[derive(Clone, Debug, Serialize, Deserialize)] | ||||
#[serde(default)] | #[serde(default)] | ||||
@@ -152,6 +165,8 @@ pub struct Config { | |||||
#[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need | #[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need | ||||
pub extra_syntax_set: Option<SyntaxSet>, | pub extra_syntax_set: Option<SyntaxSet>, | ||||
pub link_checker: LinkChecker, | |||||
/// All user params set in [extra] in the config | /// All user params set in [extra] in the config | ||||
pub extra: HashMap<String, Toml>, | pub extra: HashMap<String, Toml>, | ||||
@@ -317,9 +332,16 @@ impl Config { | |||||
Error::msg(format!("Translation for language '{}' is missing", lang.as_ref())) | Error::msg(format!("Translation for language '{}' is missing", lang.as_ref())) | ||||
})?; | })?; | ||||
terms.get(key.as_ref()).ok_or_else(|| { | |||||
Error::msg(format!("Translation key '{}' for language '{}' is missing", key.as_ref(), lang.as_ref())) | |||||
}).map(|term| term.to_string()) | |||||
terms | |||||
.get(key.as_ref()) | |||||
.ok_or_else(|| { | |||||
Error::msg(format!( | |||||
"Translation key '{}' for language '{}' is missing", | |||||
key.as_ref(), | |||||
lang.as_ref() | |||||
)) | |||||
}) | |||||
.map(|term| term.to_string()) | |||||
} | } | ||||
} | } | ||||
@@ -346,6 +368,7 @@ impl Default for Config { | |||||
translations: HashMap::new(), | translations: HashMap::new(), | ||||
extra_syntaxes: Vec::new(), | extra_syntaxes: Vec::new(), | ||||
extra_syntax_set: None, | extra_syntax_set: None, | ||||
link_checker: LinkChecker::default(), | |||||
extra: HashMap::new(), | extra: HashMap::new(), | ||||
build_timestamp: Some(1), | build_timestamp: Some(1), | ||||
} | } | ||||
@@ -551,4 +574,25 @@ ignored_content = ["*.{graphml,iso}", "*.py?"] | |||||
assert!(g.is_match("foo.py3")); | assert!(g.is_match("foo.py3")); | ||||
assert!(!g.is_match("foo.py")); | assert!(!g.is_match("foo.py")); | ||||
} | } | ||||
#[test] | |||||
fn link_checker_skip_anchor_prefixes() { | |||||
let config_str = r#" | |||||
title = "My site" | |||||
base_url = "example.com" | |||||
[link_checker] | |||||
skip_anchor_prefixes = [ | |||||
"https://caniuse.com/#feat=", | |||||
"https://github.com/rust-lang/rust/blob/", | |||||
] | |||||
"#; | |||||
let config = Config::parse(config_str).unwrap(); | |||||
let v = config.link_checker.skip_anchor_prefixes; | |||||
assert_eq!( | |||||
v, | |||||
vec!["https://caniuse.com/#feat=", "https://github.com/rust-lang/rust/blob/"] | |||||
); | |||||
} | |||||
} | } |
@@ -14,7 +14,7 @@ extern crate utils; | |||||
mod config; | mod config; | ||||
pub mod highlighting; | pub mod highlighting; | ||||
mod theme; | mod theme; | ||||
pub use config::{Config, Language, Taxonomy}; | |||||
pub use config::{Config, Language, LinkChecker, Taxonomy}; | |||||
use std::path::Path; | use std::path::Path; | ||||
@@ -272,7 +272,7 @@ impl ImageOp { | |||||
} else { | } else { | ||||
img | img | ||||
} | } | ||||
}, | |||||
} | |||||
Fill(w, h) => { | Fill(w, h) => { | ||||
let factor_w = img_w as f32 / w as f32; | let factor_w = img_w as f32 / w as f32; | ||||
let factor_h = img_h as f32 / h as f32; | let factor_h = img_h as f32 / h as f32; | ||||
@@ -1,7 +1,7 @@ | |||||
use std::collections::{HashMap, HashSet}; | use std::collections::{HashMap, HashSet}; | ||||
use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||
use slotmap::{DenseSlotMap, DefaultKey}; | |||||
use slotmap::{DefaultKey, DenseSlotMap}; | |||||
use front_matter::SortBy; | use front_matter::SortBy; | ||||
@@ -21,7 +21,9 @@ pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering { | |||||
/// Takes a list of (page key, date, permalink) and sort them by dates if possible | /// Takes a list of (page key, date, permalink) and sort them by dates if possible | ||||
/// Pages without date will be put in the unsortable bucket | /// Pages without date will be put in the unsortable bucket | ||||
/// The permalink is used to break ties | /// The permalink is used to break ties | ||||
pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||||
pub fn sort_pages_by_date( | |||||
pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>, | |||||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | ||||
pages.into_par_iter().partition(|page| page.1.is_some()); | pages.into_par_iter().partition(|page| page.1.is_some()); | ||||
@@ -40,7 +42,9 @@ pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)> | |||||
/// Takes a list of (page key, weight, permalink) and sort them by weight if possible | /// Takes a list of (page key, weight, permalink) and sort them by weight if possible | ||||
/// Pages without weight will be put in the unsortable bucket | /// Pages without weight will be put in the unsortable bucket | ||||
/// The permalink is used to break ties | /// The permalink is used to break ties | ||||
pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||||
pub fn sort_pages_by_weight( | |||||
pages: Vec<(&DefaultKey, Option<usize>, &str)>, | |||||
) -> (Vec<DefaultKey>, Vec<DefaultKey>) { | |||||
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = | ||||
pages.into_par_iter().partition(|page| page.1.is_some()); | pages.into_par_iter().partition(|page| page.1.is_some()); | ||||
@@ -57,7 +61,9 @@ pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> ( | |||||
} | } | ||||
/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight | /// Find the lighter/heavier and earlier/later pages for all pages having a date/weight | ||||
pub fn find_siblings(sorted: &[DefaultKey]) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> { | |||||
pub fn find_siblings( | |||||
sorted: &[DefaultKey], | |||||
) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> { | |||||
let mut res = Vec::with_capacity(sorted.len()); | let mut res = Vec::with_capacity(sorted.len()); | ||||
let length = sorted.len(); | let length = sorted.len(); | ||||
@@ -7,4 +7,5 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"] | |||||
reqwest = "0.9" | reqwest = "0.9" | ||||
lazy_static = "1" | lazy_static = "1" | ||||
config = { path = "../config" } | |||||
errors = { path = "../errors" } | errors = { path = "../errors" } |
@@ -2,11 +2,13 @@ extern crate reqwest; | |||||
#[macro_use] | #[macro_use] | ||||
extern crate lazy_static; | extern crate lazy_static; | ||||
extern crate config; | |||||
extern crate errors; | extern crate errors; | ||||
use reqwest::header::{HeaderMap, ACCEPT}; | use reqwest::header::{HeaderMap, ACCEPT}; | ||||
use reqwest::StatusCode; | use reqwest::StatusCode; | ||||
use config::LinkChecker; | |||||
use errors::Result; | use errors::Result; | ||||
use std::collections::HashMap; | use std::collections::HashMap; | ||||
@@ -51,7 +53,7 @@ lazy_static! { | |||||
static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new())); | static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new())); | ||||
} | } | ||||
pub fn check_url(url: &str) -> LinkResult { | |||||
pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult { | |||||
{ | { | ||||
let guard = LINKS.read().unwrap(); | let guard = LINKS.read().unwrap(); | ||||
if let Some(res) = guard.get(url) { | if let Some(res) = guard.get(url) { | ||||
@@ -65,9 +67,11 @@ pub fn check_url(url: &str) -> LinkResult { | |||||
let client = reqwest::Client::new(); | let client = reqwest::Client::new(); | ||||
let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix)); | |||||
// Need to actually do the link checking | // Need to actually do the link checking | ||||
let res = match client.get(url).headers(headers).send() { | let res = match client.get(url).headers(headers).send() { | ||||
Ok(ref mut response) if has_anchor(url) => { | |||||
Ok(ref mut response) if check_anchor && has_anchor(url) => { | |||||
match check_page_for_anchor(url, response.text()) { | match check_page_for_anchor(url, response.text()) { | ||||
Ok(_) => LinkResult { code: Some(response.status()), error: None }, | Ok(_) => LinkResult { code: Some(response.status()), error: None }, | ||||
Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, | Err(e) => LinkResult { code: None, error: Some(e.to_string()) }, | ||||
@@ -111,21 +115,21 @@ fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()> | |||||
#[cfg(test)] | #[cfg(test)] | ||||
mod tests { | mod tests { | ||||
use super::{check_page_for_anchor, check_url, has_anchor, LINKS}; | |||||
use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS}; | |||||
#[test] | #[test] | ||||
fn can_validate_ok_links() { | fn can_validate_ok_links() { | ||||
let url = "https://google.com"; | let url = "https://google.com"; | ||||
let res = check_url(url); | |||||
let res = check_url(url, &LinkChecker::default()); | |||||
assert!(res.is_valid()); | assert!(res.is_valid()); | ||||
assert!(LINKS.read().unwrap().get(url).is_some()); | assert!(LINKS.read().unwrap().get(url).is_some()); | ||||
let res = check_url(url); | |||||
let res = check_url(url, &LinkChecker::default()); | |||||
assert!(res.is_valid()); | assert!(res.is_valid()); | ||||
} | } | ||||
#[test] | #[test] | ||||
fn can_fail_404_links() { | fn can_fail_404_links() { | ||||
let res = check_url("https://google.comys"); | |||||
let res = check_url("https://google.comys", &LinkChecker::default()); | |||||
assert_eq!(res.is_valid(), false); | assert_eq!(res.is_valid(), false); | ||||
assert!(res.code.is_none()); | assert!(res.code.is_none()); | ||||
assert!(res.error.is_some()); | assert!(res.error.is_some()); | ||||
@@ -190,4 +194,23 @@ mod tests { | |||||
let res = has_anchor(url); | let res = has_anchor(url); | ||||
assert_eq!(res, false); | assert_eq!(res, false); | ||||
} | } | ||||
#[test] | |||||
fn skip_anchor_prefixes() { | |||||
let config = LinkChecker { | |||||
skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()], | |||||
}; | |||||
// anchor check is ignored because the url matches the prefix | |||||
let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214"; | |||||
assert!(check_url(&permalink, &config).is_valid()); | |||||
// other anchors are checked | |||||
let glossary = "https://help.github.com/en/articles/github-glossary#blame"; | |||||
assert!(check_url(&glossary, &config).is_valid()); | |||||
let glossary_invalid = | |||||
"https://help.github.com/en/articles/github-glossary#anchor-does-not-exist"; | |||||
assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false); | |||||
} | |||||
} | } |
@@ -335,7 +335,7 @@ fn is_section(path: &str, languages_codes: &[&str]) -> bool { | |||||
} | } | ||||
} | } | ||||
return false; | |||||
false | |||||
} | } | ||||
/// What happens when a section or a page is created/edited | /// What happens when a section or a page is created/edited | ||||
@@ -296,8 +296,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||||
let start_idx = heading_ref.start_idx; | let start_idx = heading_ref.start_idx; | ||||
let end_idx = heading_ref.end_idx; | let end_idx = heading_ref.end_idx; | ||||
let title = get_text(&events[start_idx + 1..end_idx]); | let title = get_text(&events[start_idx + 1..end_idx]); | ||||
let id = | |||||
heading_ref.id.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0)); | |||||
let id = heading_ref | |||||
.id | |||||
.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0)); | |||||
inserted_anchors.push(id.clone()); | inserted_anchors.push(id.clone()); | ||||
// insert `id` to the tag | // insert `id` to the tag | ||||
@@ -326,7 +327,8 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||||
// record heading to make table of contents | // record heading to make table of contents | ||||
let permalink = format!("{}#{}", context.current_page_permalink, id); | let permalink = format!("{}#{}", context.current_page_permalink, id); | ||||
let h = Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() }; | |||||
let h = | |||||
Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() }; | |||||
headings.push(h); | headings.push(h); | ||||
} | } | ||||
@@ -399,7 +399,7 @@ impl Site { | |||||
all_links | all_links | ||||
.par_iter() | .par_iter() | ||||
.filter_map(|(page_path, link)| { | .filter_map(|(page_path, link)| { | ||||
let res = check_url(&link); | |||||
let res = check_url(&link, &self.config.link_checker); | |||||
if res.is_valid() { | if res.is_valid() { | ||||
None | None | ||||
} else { | } else { | ||||
@@ -662,3 +662,14 @@ fn can_ignore_markdown_content() { | |||||
let (_, _tmp_dir, public) = build_site("test_site"); | let (_, _tmp_dir, public) = build_site("test_site"); | ||||
assert!(!file_exists!(public, "posts/ignored/index.html")); | assert!(!file_exists!(public, "posts/ignored/index.html")); | ||||
} | } | ||||
#[test] | |||||
fn check_site() { | |||||
let (mut site, _tmp_dir, _public) = build_site("test_site"); | |||||
let prefixes = &site.config.link_checker.skip_anchor_prefixes; | |||||
assert_eq!(prefixes, &vec!["https://github.com/rust-lang/rust/blob/"]); | |||||
site.config.enable_check_mode(); | |||||
site.load().expect("link check test_site"); | |||||
} |
@@ -34,9 +34,10 @@ impl TeraFn for Trans { | |||||
let lang = optional_arg!(String, args.get("lang"), "`trans`: `lang` must be a string.") | let lang = optional_arg!(String, args.get("lang"), "`trans`: `lang` must be a string.") | ||||
.unwrap_or_else(|| self.config.default_language.clone()); | .unwrap_or_else(|| self.config.default_language.clone()); | ||||
let term = self.config.get_translation(lang, key).map_err(|e| { | |||||
Error::chain("Failed to retreive term translation", e) | |||||
})?; | |||||
let term = self | |||||
.config | |||||
.get_translation(lang, key) | |||||
.map_err(|e| Error::chain("Failed to retreive term translation", e))?; | |||||
Ok(to_value(term).unwrap()) | Ok(to_value(term).unwrap()) | ||||
} | } | ||||
@@ -509,7 +510,6 @@ mod tests { | |||||
assert!(static_fn.call(&args).is_err()); | assert!(static_fn.call(&args).is_err()); | ||||
} | } | ||||
const TRANS_CONFIG: &str = r#" | const TRANS_CONFIG: &str = r#" | ||||
base_url = "https://remplace-par-ton-url.fr" | base_url = "https://remplace-par-ton-url.fr" | ||||
default_language = "fr" | default_language = "fr" | ||||
@@ -95,8 +95,14 @@ extra_syntaxes = [] | |||||
# | # | ||||
# [translations.en] | # [translations.en] | ||||
# title = "A title" | # title = "A title" | ||||
# | |||||
[translations] | |||||
# Configure the link checker | |||||
[link_checker] | |||||
# Skip anchor checking for external URLs that start with these prefixes | |||||
skip_anchor_prefixes = [ | |||||
"https://caniuse.com/", | |||||
] | |||||
# You can put any kind of data in there and it | # You can put any kind of data in there and it | ||||
# will be accessible in all templates | # will be accessible in all templates | ||||
@@ -13,5 +13,10 @@ extra_syntaxes = ["syntaxes"] | |||||
ignored_content = ["*/ignored.md"] | ignored_content = ["*/ignored.md"] | ||||
[link_checker] | |||||
skip_anchor_prefixes = [ | |||||
"https://github.com/rust-lang/rust/blob/", | |||||
] | |||||
[extra.author] | [extra.author] | ||||
name = "Vincent Prouillet" | name = "Vincent Prouillet" |
@@ -5,3 +5,9 @@ date = 2017-01-01 | |||||
+++ | +++ | ||||
A simple page | A simple page | ||||
<!-- more --> | |||||
Link to some rust-lang [source code][permalink]. | |||||
[permalink]: https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214 |