Browse Source

Skip anchor checking for URL with prefix in config (#812)

* cargo fmt & clippy

* Skip anchor checking for URL with prefix in config
index-subcmd
Tjeu Kayim Vincent Prouillet 4 years ago
parent
commit
6149fd17e1
16 changed files with 133 additions and 28 deletions
  1. +1
    -0
      Cargo.lock
  2. +49
    -5
      components/config/src/config.rs
  3. +1
    -1
      components/config/src/lib.rs
  4. +1
    -1
      components/imageproc/src/lib.rs
  5. +1
    -1
      components/library/src/library.rs
  6. +9
    -3
      components/library/src/sorting.rs
  7. +1
    -0
      components/link_checker/Cargo.toml
  8. +29
    -6
      components/link_checker/src/lib.rs
  9. +1
    -1
      components/rebuild/src/lib.rs
  10. +5
    -3
      components/rendering/src/markdown.rs
  11. +1
    -1
      components/site/src/lib.rs
  12. +11
    -0
      components/site/tests/site.rs
  13. +4
    -4
      components/templates/src/global_fns/mod.rs
  14. +8
    -2
      docs/content/documentation/getting-started/configuration.md
  15. +5
    -0
      test_site/config.toml
  16. +6
    -0
      test_site/content/posts/tutorials/programming/rust.md

+ 1
- 0
Cargo.lock View File

@@ -1241,6 +1241,7 @@ dependencies = [
name = "link_checker"
version = "0.1.0"
dependencies = [
"config 0.1.0",
"errors 0.1.0",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.21 (registry+https://github.com/rust-lang/crates.io-index)",


+ 49
- 5
components/config/src/config.rs View File

@@ -7,8 +7,8 @@ use syntect::parsing::{SyntaxSet, SyntaxSetBuilder};
use toml;
use toml::Value as Toml;

use errors::Result;
use errors::Error;
use errors::Result;
use highlighting::THEME_SET;
use theme::Theme;
use utils::fs::read_file_with_error;
@@ -86,7 +86,20 @@ impl Default for Taxonomy {
}
}

type TranslateTerm = HashMap<String, String>;
type TranslateTerm = HashMap<String, String>;

#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct LinkChecker {
/// Skip anchor checking for these URL prefixes
pub skip_anchor_prefixes: Vec<String>,
}

impl Default for LinkChecker {
fn default() -> LinkChecker {
LinkChecker { skip_anchor_prefixes: Vec::new() }
}
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(default)]
@@ -152,6 +165,8 @@ pub struct Config {
#[serde(skip_serializing, skip_deserializing)] // not a typo, 2 are need
pub extra_syntax_set: Option<SyntaxSet>,

pub link_checker: LinkChecker,

/// All user params set in [extra] in the config
pub extra: HashMap<String, Toml>,

@@ -317,9 +332,16 @@ impl Config {
Error::msg(format!("Translation for language '{}' is missing", lang.as_ref()))
})?;

terms.get(key.as_ref()).ok_or_else(|| {
Error::msg(format!("Translation key '{}' for language '{}' is missing", key.as_ref(), lang.as_ref()))
}).map(|term| term.to_string())
terms
.get(key.as_ref())
.ok_or_else(|| {
Error::msg(format!(
"Translation key '{}' for language '{}' is missing",
key.as_ref(),
lang.as_ref()
))
})
.map(|term| term.to_string())
}
}

@@ -346,6 +368,7 @@ impl Default for Config {
translations: HashMap::new(),
extra_syntaxes: Vec::new(),
extra_syntax_set: None,
link_checker: LinkChecker::default(),
extra: HashMap::new(),
build_timestamp: Some(1),
}
@@ -551,4 +574,25 @@ ignored_content = ["*.{graphml,iso}", "*.py?"]
assert!(g.is_match("foo.py3"));
assert!(!g.is_match("foo.py"));
}

#[test]
fn link_checker_skip_anchor_prefixes() {
let config_str = r#"
title = "My site"
base_url = "example.com"

[link_checker]
skip_anchor_prefixes = [
"https://caniuse.com/#feat=",
"https://github.com/rust-lang/rust/blob/",
]
"#;

let config = Config::parse(config_str).unwrap();
let v = config.link_checker.skip_anchor_prefixes;
assert_eq!(
v,
vec!["https://caniuse.com/#feat=", "https://github.com/rust-lang/rust/blob/"]
);
}
}

+ 1
- 1
components/config/src/lib.rs View File

@@ -14,7 +14,7 @@ extern crate utils;
mod config;
pub mod highlighting;
mod theme;
pub use config::{Config, Language, Taxonomy};
pub use config::{Config, Language, LinkChecker, Taxonomy};

use std::path::Path;



+ 1
- 1
components/imageproc/src/lib.rs View File

@@ -272,7 +272,7 @@ impl ImageOp {
} else {
img
}
},
}
Fill(w, h) => {
let factor_w = img_w as f32 / w as f32;
let factor_h = img_h as f32 / h as f32;


+ 1
- 1
components/library/src/library.rs View File

@@ -1,7 +1,7 @@
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};

use slotmap::{DenseSlotMap, DefaultKey};
use slotmap::{DefaultKey, DenseSlotMap};

use front_matter::SortBy;



+ 9
- 3
components/library/src/sorting.rs View File

@@ -21,7 +21,9 @@ pub fn sort_actual_pages_by_date(a: &&Page, b: &&Page) -> Ordering {
/// Takes a list of (page key, date, permalink) and sort them by dates if possible
/// Pages without date will be put in the unsortable bucket
/// The permalink is used to break ties
pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
pub fn sort_pages_by_date(
pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>,
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
pages.into_par_iter().partition(|page| page.1.is_some());

@@ -40,7 +42,9 @@ pub fn sort_pages_by_date(pages: Vec<(&DefaultKey, Option<NaiveDateTime>, &str)>
/// Takes a list of (page key, weight, permalink) and sort them by weight if possible
/// Pages without weight will be put in the unsortable bucket
/// The permalink is used to break ties
pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
pub fn sort_pages_by_weight(
pages: Vec<(&DefaultKey, Option<usize>, &str)>,
) -> (Vec<DefaultKey>, Vec<DefaultKey>) {
let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) =
pages.into_par_iter().partition(|page| page.1.is_some());

@@ -57,7 +61,9 @@ pub fn sort_pages_by_weight(pages: Vec<(&DefaultKey, Option<usize>, &str)>) -> (
}

/// Find the lighter/heavier and earlier/later pages for all pages having a date/weight
pub fn find_siblings(sorted: &[DefaultKey]) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {
pub fn find_siblings(
sorted: &[DefaultKey],
) -> Vec<(DefaultKey, Option<DefaultKey>, Option<DefaultKey>)> {
let mut res = Vec::with_capacity(sorted.len());
let length = sorted.len();



+ 1
- 0
components/link_checker/Cargo.toml View File

@@ -7,4 +7,5 @@ authors = ["Vincent Prouillet <prouillet.vincent@gmail.com>"]
reqwest = "0.9"
lazy_static = "1"

config = { path = "../config" }
errors = { path = "../errors" }

+ 29
- 6
components/link_checker/src/lib.rs View File

@@ -2,11 +2,13 @@ extern crate reqwest;
#[macro_use]
extern crate lazy_static;

extern crate config;
extern crate errors;

use reqwest::header::{HeaderMap, ACCEPT};
use reqwest::StatusCode;

use config::LinkChecker;
use errors::Result;

use std::collections::HashMap;
@@ -51,7 +53,7 @@ lazy_static! {
static ref LINKS: Arc<RwLock<HashMap<String, LinkResult>>> = Arc::new(RwLock::new(HashMap::new()));
}

pub fn check_url(url: &str) -> LinkResult {
pub fn check_url(url: &str, config: &LinkChecker) -> LinkResult {
{
let guard = LINKS.read().unwrap();
if let Some(res) = guard.get(url) {
@@ -65,9 +67,11 @@ pub fn check_url(url: &str) -> LinkResult {

let client = reqwest::Client::new();

let check_anchor = !config.skip_anchor_prefixes.iter().any(|prefix| url.starts_with(prefix));

// Need to actually do the link checking
let res = match client.get(url).headers(headers).send() {
Ok(ref mut response) if has_anchor(url) => {
Ok(ref mut response) if check_anchor && has_anchor(url) => {
match check_page_for_anchor(url, response.text()) {
Ok(_) => LinkResult { code: Some(response.status()), error: None },
Err(e) => LinkResult { code: None, error: Some(e.to_string()) },
@@ -111,21 +115,21 @@ fn check_page_for_anchor(url: &str, body: reqwest::Result<String>) -> Result<()>

#[cfg(test)]
mod tests {
use super::{check_page_for_anchor, check_url, has_anchor, LINKS};
use super::{check_page_for_anchor, check_url, has_anchor, LinkChecker, LINKS};

#[test]
fn can_validate_ok_links() {
let url = "https://google.com";
let res = check_url(url);
let res = check_url(url, &LinkChecker::default());
assert!(res.is_valid());
assert!(LINKS.read().unwrap().get(url).is_some());
let res = check_url(url);
let res = check_url(url, &LinkChecker::default());
assert!(res.is_valid());
}

#[test]
fn can_fail_404_links() {
let res = check_url("https://google.comys");
let res = check_url("https://google.comys", &LinkChecker::default());
assert_eq!(res.is_valid(), false);
assert!(res.code.is_none());
assert!(res.error.is_some());
@@ -190,4 +194,23 @@ mod tests {
let res = has_anchor(url);
assert_eq!(res, false);
}

#[test]
fn skip_anchor_prefixes() {
let config = LinkChecker {
skip_anchor_prefixes: vec!["https://github.com/rust-lang/rust/blob/".to_owned()],
};

// anchor check is ignored because the url matches the prefix
let permalink = "https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214";
assert!(check_url(&permalink, &config).is_valid());

// other anchors are checked
let glossary = "https://help.github.com/en/articles/github-glossary#blame";
assert!(check_url(&glossary, &config).is_valid());

let glossary_invalid =
"https://help.github.com/en/articles/github-glossary#anchor-does-not-exist";
assert_eq!(check_url(&glossary_invalid, &config).is_valid(), false);
}
}

+ 1
- 1
components/rebuild/src/lib.rs View File

@@ -335,7 +335,7 @@ fn is_section(path: &str, languages_codes: &[&str]) -> bool {
}
}

return false;
false
}

/// What happens when a section or a page is created/edited


+ 5
- 3
components/rendering/src/markdown.rs View File

@@ -296,8 +296,9 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render
let start_idx = heading_ref.start_idx;
let end_idx = heading_ref.end_idx;
let title = get_text(&events[start_idx + 1..end_idx]);
let id =
heading_ref.id.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0));
let id = heading_ref
.id
.unwrap_or_else(|| find_anchor(&inserted_anchors, slugify(&title), 0));
inserted_anchors.push(id.clone());

// insert `id` to the tag
@@ -326,7 +327,8 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render

// record heading to make table of contents
let permalink = format!("{}#{}", context.current_page_permalink, id);
let h = Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
let h =
Heading { level: heading_ref.level, id, permalink, title, children: Vec::new() };
headings.push(h);
}



+ 1
- 1
components/site/src/lib.rs View File

@@ -399,7 +399,7 @@ impl Site {
all_links
.par_iter()
.filter_map(|(page_path, link)| {
let res = check_url(&link);
let res = check_url(&link, &self.config.link_checker);
if res.is_valid() {
None
} else {


+ 11
- 0
components/site/tests/site.rs View File

@@ -662,3 +662,14 @@ fn can_ignore_markdown_content() {
let (_, _tmp_dir, public) = build_site("test_site");
assert!(!file_exists!(public, "posts/ignored/index.html"));
}

#[test]
fn check_site() {
let (mut site, _tmp_dir, _public) = build_site("test_site");

let prefixes = &site.config.link_checker.skip_anchor_prefixes;
assert_eq!(prefixes, &vec!["https://github.com/rust-lang/rust/blob/"]);

site.config.enable_check_mode();
site.load().expect("link check test_site");
}

+ 4
- 4
components/templates/src/global_fns/mod.rs View File

@@ -34,9 +34,10 @@ impl TeraFn for Trans {
let lang = optional_arg!(String, args.get("lang"), "`trans`: `lang` must be a string.")
.unwrap_or_else(|| self.config.default_language.clone());

let term = self.config.get_translation(lang, key).map_err(|e| {
Error::chain("Failed to retreive term translation", e)
})?;
let term = self
.config
.get_translation(lang, key)
.map_err(|e| Error::chain("Failed to retreive term translation", e))?;

Ok(to_value(term).unwrap())
}
@@ -509,7 +510,6 @@ mod tests {
assert!(static_fn.call(&args).is_err());
}


const TRANS_CONFIG: &str = r#"
base_url = "https://remplace-par-ton-url.fr"
default_language = "fr"


+ 8
- 2
docs/content/documentation/getting-started/configuration.md View File

@@ -95,8 +95,14 @@ extra_syntaxes = []
#
# [translations.en]
# title = "A title"
#
[translations]


# Configure the link checker
[link_checker]
# Skip anchor checking for external URLs that start with these prefixes
skip_anchor_prefixes = [
"https://caniuse.com/",
]

# You can put any kind of data in there and it
# will be accessible in all templates


+ 5
- 0
test_site/config.toml View File

@@ -13,5 +13,10 @@ extra_syntaxes = ["syntaxes"]

ignored_content = ["*/ignored.md"]

[link_checker]
skip_anchor_prefixes = [
"https://github.com/rust-lang/rust/blob/",
]

[extra.author]
name = "Vincent Prouillet"

+ 6
- 0
test_site/content/posts/tutorials/programming/rust.md View File

@@ -5,3 +5,9 @@ date = 2017-01-01
+++

A simple page

<!-- more -->

Link to some rust-lang [source code][permalink].

[permalink]: https://github.com/rust-lang/rust/blob/c772948b687488a087356cb91432425662e034b9/src/librustc_back/target/mod.rs#L194-L214

Loading…
Cancel
Save