* Add check subcommand * Add some brief documentation for the check subcommand * Start working on parallel link checks * Check all external links in Site * Return *all* dead links in siteindex-subcmd
@@ -2162,6 +2162,7 @@ dependencies = [ | |||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"imageproc 0.1.0", | |||
"library 0.1.0", | |||
"link_checker 0.1.0", | |||
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"sass-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"search 0.1.0", | |||
@@ -76,6 +76,8 @@ pub struct Page { | |||
pub lang: String, | |||
/// Contains all the translated version of that page | |||
pub translations: Vec<Key>, | |||
/// Contains the external links that need to be checked | |||
pub external_links: Vec<String>, | |||
} | |||
impl Page { | |||
@@ -104,6 +106,7 @@ impl Page { | |||
reading_time: None, | |||
lang: String::new(), | |||
translations: Vec::new(), | |||
external_links: Vec::new(), | |||
} | |||
} | |||
@@ -263,6 +266,7 @@ impl Page { | |||
self.summary = res.summary_len.map(|l| res.body[0..l].to_owned()); | |||
self.content = res.body; | |||
self.toc = res.toc; | |||
self.external_links = res.external_links; | |||
Ok(()) | |||
} | |||
@@ -339,6 +343,7 @@ impl Default for Page { | |||
reading_time: None, | |||
lang: String::new(), | |||
translations: Vec::new(), | |||
external_links: Vec::new(), | |||
} | |||
} | |||
} | |||
@@ -56,6 +56,8 @@ pub struct Section { | |||
pub lang: String, | |||
/// Contains all the translated version of that section | |||
pub translations: Vec<Key>, | |||
/// Contains the external links that need to be checked | |||
pub external_links: Vec<String>, | |||
} | |||
impl Section { | |||
@@ -85,6 +87,7 @@ impl Section { | |||
reading_time: None, | |||
lang: String::new(), | |||
translations: Vec::new(), | |||
external_links: Vec::new(), | |||
} | |||
} | |||
@@ -189,6 +192,8 @@ impl Section { | |||
})?; | |||
self.content = res.body; | |||
self.toc = res.toc; | |||
self.external_links = res.external_links; | |||
Ok(()) | |||
} | |||
@@ -255,6 +260,7 @@ impl Default for Section { | |||
word_count: None, | |||
lang: String::new(), | |||
translations: Vec::new(), | |||
external_links: Vec::new(), | |||
} | |||
} | |||
} | |||
@@ -9,7 +9,6 @@ use config::highlighting::{get_highlighter, SYNTAX_SET, THEME_SET}; | |||
use context::RenderContext; | |||
use errors::{Error, Result}; | |||
use front_matter::InsertAnchor; | |||
use link_checker::check_url; | |||
use table_of_contents::{make_table_of_contents, Header}; | |||
use utils::site::resolve_internal_link; | |||
use utils::vec::InsertMany; | |||
@@ -25,6 +24,7 @@ pub struct Rendered { | |||
pub body: String, | |||
pub summary_len: Option<usize>, | |||
pub toc: Vec<Header>, | |||
pub external_links: Vec<String>, | |||
} | |||
// tracks a header in a slice of pulldown-cmark events | |||
@@ -66,7 +66,7 @@ fn is_colocated_asset_link(link: &str) -> bool { | |||
&& !link.starts_with("mailto:") | |||
} | |||
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result<String> { | |||
fn fix_link(link_type: LinkType, link: &str, context: &RenderContext, external_links: &mut Vec<String>) -> Result<String> { | |||
if link_type == LinkType::Email { | |||
return Ok(link.to_string()); | |||
} | |||
@@ -83,17 +83,10 @@ fn fix_link(link_type: LinkType, link: &str, context: &RenderContext) -> Result< | |||
} | |||
} else if is_colocated_asset_link(&link) { | |||
format!("{}{}", context.current_page_permalink, link) | |||
} else if context.config.check_external_links | |||
&& !link.starts_with('#') | |||
&& !link.starts_with("mailto:") | |||
{ | |||
let res = check_url(&link); | |||
if res.is_valid() { | |||
link.to_string() | |||
} else { | |||
return Err(format!("Link {} is not valid: {}", link, res.message()).into()); | |||
} | |||
} else { | |||
if !link.starts_with('#') && !link.starts_with("mailto:") { | |||
external_links.push(link.to_owned()); | |||
} | |||
link.to_string() | |||
}; | |||
Ok(result) | |||
@@ -142,6 +135,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||
let mut inserted_anchors: Vec<String> = vec![]; | |||
let mut headers: Vec<Header> = vec![]; | |||
let mut external_links = Vec::new(); | |||
let mut opts = Options::empty(); | |||
let mut has_summary = false; | |||
@@ -207,7 +201,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||
Event::Start(Tag::Image(link_type, src, title)) | |||
} | |||
Event::Start(Tag::Link(link_type, link, title)) => { | |||
let fixed_link = match fix_link(link_type, &link, context) { | |||
let fixed_link = match fix_link(link_type, &link, context, &mut external_links) { | |||
Ok(fixed_link) => fixed_link, | |||
Err(err) => { | |||
error = Some(err); | |||
@@ -302,6 +296,7 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result<Render | |||
summary_len: if has_summary { html.find(CONTINUE_READING) } else { None }, | |||
body: html, | |||
toc: make_table_of_contents(headers), | |||
external_links: external_links, | |||
}) | |||
} | |||
} |
@@ -697,60 +697,9 @@ Some text | |||
} | |||
#[test] | |||
fn can_validate_valid_external_links() { | |||
fn correctly_captures_external_links() { | |||
let permalinks_ctx = HashMap::new(); | |||
let mut config = Config::default(); | |||
config.check_external_links = true; | |||
let context = RenderContext::new( | |||
&ZOLA_TERA, | |||
&config, | |||
"https://vincent.is/about/", | |||
&permalinks_ctx, | |||
InsertAnchor::None, | |||
); | |||
let res = render_content("[a link](http://google.com)", &context).unwrap(); | |||
assert_eq!(res.body, "<p><a href=\"http://google.com\">a link</a></p>\n"); | |||
} | |||
#[test] | |||
fn can_show_error_message_for_invalid_external_links() { | |||
let permalinks_ctx = HashMap::new(); | |||
let mut config = Config::default(); | |||
config.check_external_links = true; | |||
let context = RenderContext::new( | |||
&ZOLA_TERA, | |||
&config, | |||
"https://vincent.is/about/", | |||
&permalinks_ctx, | |||
InsertAnchor::None, | |||
); | |||
let res = render_content("[a link](http://google.comy)", &context); | |||
assert!(res.is_err()); | |||
let err = res.unwrap_err(); | |||
assert!(format!("{}", err).contains("Link http://google.comy is not valid")); | |||
} | |||
#[test] | |||
fn doesnt_try_to_validate_email_links_mailto() { | |||
let permalinks_ctx = HashMap::new(); | |||
let mut config = Config::default(); | |||
config.check_external_links = true; | |||
let context = RenderContext::new( | |||
&ZOLA_TERA, | |||
&config, | |||
"https://vincent.is/about/", | |||
&permalinks_ctx, | |||
InsertAnchor::None, | |||
); | |||
let res = render_content("Email: [foo@bar.baz](mailto:foo@bar.baz)", &context).unwrap(); | |||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n"); | |||
} | |||
#[test] | |||
fn doesnt_try_to_validate_email_links_angled_brackets() { | |||
let permalinks_ctx = HashMap::new(); | |||
let mut config = Config::default(); | |||
config.check_external_links = true; | |||
let config = Config::default(); | |||
let context = RenderContext::new( | |||
&ZOLA_TERA, | |||
&config, | |||
@@ -758,8 +707,14 @@ fn doesnt_try_to_validate_email_links_angled_brackets() { | |||
&permalinks_ctx, | |||
InsertAnchor::None, | |||
); | |||
let res = render_content("Email: <foo@bar.baz>", &context).unwrap(); | |||
assert_eq!(res.body, "<p>Email: <a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n"); | |||
let content = " | |||
[a link](http://google.com) | |||
[a link](http://google.comy) | |||
Email: [foo@bar.baz](mailto:foo@bar.baz) | |||
Email: <foo@bar.baz> | |||
"; | |||
let res = render_content(content, &context).unwrap(); | |||
assert_eq!(res.external_links, &["http://google.com".to_owned(), "http://google.comy".to_owned()]); | |||
} | |||
#[test] | |||
@@ -19,6 +19,7 @@ front_matter = { path = "../front_matter" } | |||
search = { path = "../search" } | |||
imageproc = { path = "../imageproc" } | |||
library = { path = "../library" } | |||
link_checker = { path = "../link_checker" } | |||
[dev-dependencies] | |||
tempfile = "3" |
@@ -15,6 +15,7 @@ extern crate library; | |||
extern crate search; | |||
extern crate templates; | |||
extern crate utils; | |||
extern crate link_checker; | |||
#[cfg(test)] | |||
extern crate tempfile; | |||
@@ -33,7 +34,7 @@ use sass_rs::{compile_file, Options as SassOptions, OutputStyle}; | |||
use tera::{Context, Tera}; | |||
use config::{get_config, Config}; | |||
use errors::{Error, Result}; | |||
use errors::{Error, ErrorKind, Result}; | |||
use front_matter::InsertAnchor; | |||
use library::{ | |||
find_taxonomies, sort_actual_pages_by_date, Library, Page, Paginator, Section, Taxonomy, | |||
@@ -42,6 +43,7 @@ use templates::{global_fns, render_redirect_template, ZOLA_TERA}; | |||
use utils::fs::{copy_directory, create_directory, create_file, ensure_directory_exists}; | |||
use utils::net::get_available_port; | |||
use utils::templates::{render_template, rewrite_theme_paths}; | |||
use link_checker::check_url; | |||
#[derive(Debug)] | |||
pub struct Site { | |||
@@ -243,9 +245,64 @@ impl Site { | |||
self.render_markdown()?; | |||
self.register_tera_global_fns(); | |||
if self.config.check_external_links { | |||
self.check_external_links()?; | |||
} | |||
Ok(()) | |||
} | |||
pub fn check_external_links(&self) -> Result<()> { | |||
let library = self.library.write().expect("Get lock for check_external_links"); | |||
let page_links = library.pages() | |||
.values() | |||
.map(|p| { | |||
let path = &p.file.path; | |||
p.external_links.iter().map(move |l| (path.clone(), l)) | |||
}) | |||
.flatten(); | |||
let section_links = library.sections() | |||
.values() | |||
.map(|p| { | |||
let path = &p.file.path; | |||
p.external_links.iter().map(move |l| (path.clone(), l)) | |||
}) | |||
.flatten(); | |||
let all_links = page_links.chain(section_links).collect::<Vec<_>>(); | |||
// create thread pool with lots of threads so we can fetch | |||
// (almost) all pages simultaneously | |||
let threads = std::cmp::min(all_links.len(), 32); | |||
let pool = rayon::ThreadPoolBuilder::new().num_threads(threads).build().map_err(|e| Error { | |||
kind: ErrorKind::Msg(e.to_string().into()), | |||
source: None, | |||
})?; | |||
let errors: Vec<_> = pool.install(|| { | |||
all_links.par_iter().filter_map(|(path, link)| { | |||
let res = check_url(link); | |||
if res.is_valid() { | |||
None | |||
} else { | |||
Some((path, res)) | |||
} | |||
}).collect() | |||
}); | |||
if errors.is_empty() { | |||
Ok(()) | |||
} else { | |||
let msg = errors.into_iter() | |||
.map(|(path, check_res)| format!("Dead link in {:?}: {:?}", path, check_res)) | |||
.collect::<Vec<_>>() | |||
.join("\n"); | |||
Err(Error { | |||
kind: ErrorKind::Msg(msg.into()), | |||
source: None, | |||
}) | |||
} | |||
} | |||
/// Insert a default index section for each language if necessary so we don't need to create | |||
/// a _index.md to render the index page at the root of the site | |||
pub fn create_default_index_sections(&mut self) -> Result<()> { | |||
@@ -83,6 +83,11 @@ You can also point to another config file than `config.toml` like so - the posit | |||
$ zola --config config.staging.toml serve | |||
``` | |||
### check | |||
The check subcommand will try to build all pages just like the build command would, but without writing any of the | |||
results to disk. Additionally, it always checks external links regardless of the site configuration. | |||
## Colored output | |||
Any of the three commands will emit colored output if your terminal supports it. | |||
@@ -67,5 +67,7 @@ pub fn build_cli() -> App<'static, 'static> { | |||
.takes_value(false) | |||
.help("Do not start a server, just re-build project on changes") | |||
]), | |||
SubCommand::with_name("check") | |||
.about("Try building the project without rendering it. Checks links") | |||
]) | |||
} |
@@ -0,0 +1,28 @@ | |||
use std::env; | |||
use std::path::PathBuf; | |||
use errors::Result; | |||
use site::Site; | |||
use console; | |||
pub fn check( | |||
config_file: &str, | |||
base_path: Option<&str>, | |||
base_url: Option<&str>, | |||
) -> Result<()> { | |||
let bp = base_path.map(PathBuf::from).unwrap_or(env::current_dir().unwrap()); | |||
let mut site = Site::new(bp, config_file)?; | |||
// Force the checking of external links | |||
site.config.check_external_links = true; | |||
// Disable syntax highlighting since the results won't be used | |||
// and this operation can be expensive. | |||
site.config.highlight_code = false; | |||
if let Some(b) = base_url { | |||
site.set_base_url(b.to_string()); | |||
} | |||
site.load()?; | |||
console::notify_site_size(&site); | |||
console::warn_about_ignored_pages(&site); | |||
Ok(()) | |||
} |
@@ -1,7 +1,9 @@ | |||
mod build; | |||
mod init; | |||
mod serve; | |||
mod check; | |||
pub use self::build::build; | |||
pub use self::init::create_new_project; | |||
pub use self::serve::serve; | |||
pub use self::check::check; |
@@ -89,6 +89,21 @@ fn main() { | |||
} | |||
}; | |||
} | |||
("check", Some(matches)) => { | |||
console::info("Checking site..."); | |||
let start = Instant::now(); | |||
match cmd::check( | |||
config_file, | |||
matches.value_of("base_path"), | |||
matches.value_of("base_url"), | |||
) { | |||
Ok(()) => console::report_elapsed_time(start), | |||
Err(e) => { | |||
console::unravel_errors("Failed to check the site", &e); | |||
::std::process::exit(1); | |||
} | |||
}; | |||
} | |||
_ => unreachable!(), | |||
} | |||
} |