diff --git a/Cargo.lock b/Cargo.lock index 96299c2..aae3519 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,6 +172,7 @@ version = "0.1.0" dependencies = [ "chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "errors 0.1.0", + "globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "highlighting 0.1.0", "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", @@ -185,6 +186,7 @@ dependencies = [ "config 0.1.0", "errors 0.1.0", "front_matter 0.1.0", + "globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "rendering 0.1.0", "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", @@ -364,6 +366,18 @@ name = "glob" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "globset" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "gutenberg" version = "0.3.1" @@ -1546,6 +1560,7 @@ dependencies = [ "checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" "checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05" "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" +"checksum globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e96ab92362c06811385ae9a34d2698e8a1160745e0c78fbb434a44c8de3fabc" "checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" "checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" "checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2" diff --git a/components/config/Cargo.toml b/components/config/Cargo.toml index fb3b521..298f813 100644 --- a/components/config/Cargo.toml +++ b/components/config/Cargo.toml @@ -8,6 +8,7 @@ toml = "0.4" serde = "1" serde_derive = "1" chrono = "0.4" +globset = "0.3.0" errors = { path = "../errors" } highlighting = { path = "../highlighting"} diff --git a/components/config/src/lib.rs b/components/config/src/lib.rs index f1a0762..9f86c52 100644 --- a/components/config/src/lib.rs +++ b/components/config/src/lib.rs @@ -5,6 +5,7 @@ extern crate toml; extern crate errors; extern crate highlighting; extern crate chrono; +extern crate globset; use std::collections::HashMap; use std::fs::File; @@ -13,6 +14,7 @@ use std::path::{Path, PathBuf}; use toml::{Value as Toml}; use chrono::Utc; +use globset::{Glob, GlobSet, GlobSetBuilder}; use errors::{Result, ResultExt}; use highlighting::THEME_SET; @@ -22,7 +24,7 @@ mod theme; use theme::Theme; -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct Config { /// Base URL of the site, the only required config argument pub base_url: String, @@ -49,6 +51,12 @@ pub struct Config { pub generate_categories_pages: Option, /// Whether to compile the `sass` directory and output the css files into the static folder pub compile_sass: Option, + /// A list of file glob patterns to ignore when processing the content folder. Defaults to none. + /// Had to remove the PartialEq derive because GlobSet does not implement it. No impact + /// because it's unused anyway (who wants to sort Configs?). + pub ignored_content: Option>, + #[serde(skip_serializing, skip_deserializing)] + pub ignored_content_globber: Option, /// Languages list and translated strings pub translations: Option>, @@ -84,6 +92,7 @@ impl Config { set_default!(config.generate_tags_pages, false); set_default!(config.generate_categories_pages, false); set_default!(config.compile_sass, false); + set_default!(config.ignored_content, Vec::new()); set_default!(config.translations, HashMap::new()); set_default!(config.extra, HashMap::new()); @@ -97,6 +106,25 @@ impl Config { }; config.build_timestamp = Some(Utc::now().timestamp()); + + // Convert the file glob strings into a compiled glob set matcher. We want to do this once, + // at program initialization, rather than for every page, for example. We arrange for the + // globset matcher to always exist (even though it has to be an inside an Option at the + // moment because of the TOML serializer); if the glob set is empty the `is_match` function + // of the globber always returns false. + let mut glob_set_builder = GlobSetBuilder::new(); + + if let Some(ref v) = config.ignored_content { + for pat in v { + let glob = match Glob::new(pat) { + Ok(g) => g, + Err(e) => bail!("Invalid ignored_content glob pattern: {}, error = {}", pat, e) + }; + glob_set_builder.add(glob); + } + } + config.ignored_content_globber = Some(glob_set_builder.build().expect("Bad ignored_content in config file.")); + Ok(config) } @@ -176,6 +204,8 @@ impl Default for Config { generate_tags_pages: Some(true), generate_categories_pages: Some(true), compile_sass: Some(false), + ignored_content: Some(Vec::new()), + ignored_content_globber: Some(GlobSetBuilder::new().build().unwrap()), translations: None, extra: None, build_timestamp: Some(1), @@ -330,4 +360,51 @@ title = "A title" assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title"); } + #[test] + fn missing_ignored_content_results_in_empty_vector_and_empty_globber() { + let config_str = r#" +title = "My site" +base_url = "example.com" + "#; + + let config = Config::parse(config_str).unwrap(); + let v = config.ignored_content.unwrap(); + assert_eq!(v.len(), 0); + assert!(config.ignored_content_globber.unwrap().is_empty()); + } + + #[test] + fn empty_ignored_content_results_in_empty_vector_and_empty_globber() { + let config_str = r#" +title = "My site" +base_url = "example.com" +ignored_content = [] + "#; + + let config = Config::parse(config_str).unwrap(); + assert_eq!(config.ignored_content.unwrap().len(), 0); + assert!(config.ignored_content_globber.unwrap().is_empty()); + } + + #[test] + fn non_empty_ignored_content_results_in_vector_of_patterns_and_configured_globber() { + let config_str = r#" +title = "My site" +base_url = "example.com" +ignored_content = ["*.{graphml,iso}", "*.py?"] + "#; + + let config = Config::parse(config_str).unwrap(); + let v = config.ignored_content.unwrap(); + assert_eq!(v, vec!["*.{graphml,iso}", "*.py?"]); + + let g = config.ignored_content_globber.unwrap(); + assert_eq!(g.len(), 2); + assert!(g.is_match("foo.graphml")); + assert!(g.is_match("foo.iso")); + assert!(!g.is_match("foo.png")); + assert!(g.is_match("foo.py2")); + assert!(g.is_match("foo.py3")); + assert!(!g.is_match("foo.py")); + } } diff --git a/components/content/Cargo.toml b/components/content/Cargo.toml index 9c61787..07c2163 100644 --- a/components/content/Cargo.toml +++ b/components/content/Cargo.toml @@ -18,3 +18,4 @@ front_matter = { path = "../front_matter" } [dev-dependencies] tempdir = "0.3" toml = "0.4" +globset = "0.3.0" diff --git a/components/content/src/lib.rs b/components/content/src/lib.rs index 3552fbc..b3a4518 100644 --- a/components/content/src/lib.rs +++ b/components/content/src/lib.rs @@ -13,6 +13,8 @@ extern crate utils; extern crate tempdir; #[cfg(test)] extern crate toml; +#[cfg(test)] +extern crate globset; mod file_info; mod page; diff --git a/components/content/src/page.rs b/components/content/src/page.rs index 59e1299..b49235c 100644 --- a/components/content/src/page.rs +++ b/components/content/src/page.rs @@ -128,10 +128,27 @@ impl Page { let path = path.as_ref(); let content = read_file(path)?; let mut page = Page::parse(path, &content, config)?; - page.assets = vec![]; if page.file.name == "index" { - page.assets = find_related_assets(path.parent().unwrap()); + // `find_related_assets` only scans the immediate directory (it is not recursive) so our + // filtering only needs to work against the file_name component, not the full suffix. If + // `find_related_assets` was changed to also return files in subdirectories, we could + // use `PathBuf.strip_prefix` to remove the parent directory and then glob-filter + // against the remaining path. Note that the current behaviour effectively means that + // the `ignored_content` setting in the config file is limited to single-file glob + // patterns (no "**" patterns). + let globber = config.ignored_content_globber.as_ref().unwrap(); + let parent_dir = path.parent().unwrap(); + page.assets = find_related_assets(parent_dir).into_iter() + .filter(|path| + match path.file_name() { + None => true, + Some(file) => !globber.is_match(file) + } + ).collect(); + + } else { + page.assets = vec![]; } Ok(page) @@ -240,6 +257,7 @@ mod tests { use tera::Tera; use tempdir::TempDir; + use globset::{Glob, GlobSetBuilder}; use config::Config; use super::Page; @@ -419,4 +437,34 @@ Hello world assert_eq!(page.assets.len(), 3); assert_eq!(page.permalink, "http://a-website.com/posts/hey/"); } + + #[test] + fn page_with_ignored_assets_filters_out_correct_files() { + let tmp_dir = TempDir::new("example").expect("create temp dir"); + let path = tmp_dir.path(); + create_dir(&path.join("content")).expect("create content temp dir"); + create_dir(&path.join("content").join("posts")).expect("create posts temp dir"); + let nested_path = path.join("content").join("posts").join("with-assets"); + create_dir(&nested_path).expect("create nested temp dir"); + let mut f = File::create(nested_path.join("index.md")).unwrap(); + f.write_all(b"+++\nslug=\"hey\"\n+++\n").unwrap(); + File::create(nested_path.join("example.js")).unwrap(); + File::create(nested_path.join("graph.jpg")).unwrap(); + File::create(nested_path.join("fail.png")).unwrap(); + + let mut gsb = GlobSetBuilder::new(); + gsb.add(Glob::new("*.{js,png}").unwrap()); + let mut config = Config::default(); + config.ignored_content_globber = Some(gsb.build().unwrap()); + + let res = Page::from_file( + nested_path.join("index.md").as_path(), + &config + ); + + assert!(res.is_ok()); + let page = res.unwrap(); + assert_eq!(page.assets.len(), 1); + assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg")); + } } diff --git a/docs/content/documentation/content/overview.md b/docs/content/documentation/content/overview.md index 8baca37..7aa6907 100644 --- a/docs/content/documentation/content/overview.md +++ b/docs/content/documentation/content/overview.md @@ -5,8 +5,8 @@ weight = 10 Gutenberg uses the folder structure to determine the site structure. -Each folder in the `content` directory represents a [section](./documentation/content/section.md) -that contains [pages](./documentation/content/page.md): your `.md` files. +Each folder in the `content` directory represents a [section](./documentation/content/section.md) +that contains [pages](./documentation/content/page.md): your `.md` files. ```bash . @@ -26,21 +26,21 @@ that contains [pages](./documentation/content/page.md): your `.md` files. Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug` attribute of the [page front-matter](./documentation/content/page.md#front-matter). -You might have noticed a file named `_index.md` in the example above. +You might have noticed a file named `_index.md` in the example above. This file will be used for the metadata and content of the section itself and is not considered a page. To make sure the terminology used in the rest of the documentation is understood, let's go over the example above. The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only -one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` +one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` and `installation.md`. While not shown in the example, sections can be nested indefinitely. ## Assets colocation -The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related -assets. +The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related +assets. Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want. Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them. @@ -52,3 +52,14 @@ Those assets will be copied in the same folder when building the site which allo ``` By default, this page will get the folder name (`with-assets` in this case) as its slug. + +It is possible to ignore selected asset files using the +[ignored_content](./documentation/getting-started/configuration.md) setting in the config file. +For example, say you have an Excel spreadsheet from which you are taking several screenshots and +then linking to those image files on your website. For maintainability purposes, you want to keep +the spreadsheet in the same folder as the markdown, but you don't want to copy the spreadsheet to +the public web site. You can achieve this by simply setting `ignored_content` in the config file: + +``` +ignored_content = ["*.xlsx"] +``` diff --git a/docs/content/documentation/getting-started/configuration.md b/docs/content/documentation/getting-started/configuration.md index f7966d5..5eeb0a0 100644 --- a/docs/content/documentation/getting-started/configuration.md +++ b/docs/content/documentation/getting-started/configuration.md @@ -3,10 +3,10 @@ title = "Configuration" weight = 4 +++ -The default configuration will be enough to get Gutenberg running locally but not more than that. +The default configuration will be enough to get Gutenberg running locally but not more than that. It follows the philosophy of only paying for what you need: almost everything is turned off by default. -To change the config, edit the `config.toml` file. +To change the config, edit the `config.toml` file. If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml) to learn about it. @@ -30,7 +30,7 @@ theme = "" # Highlight all code blocks found highlight_code = false -# Which theme to use for the code highlighting. +# Which theme to use for the code highlighting. # See below for list of accepted values highlight_theme = "base16-ocean-dark" @@ -40,21 +40,27 @@ generate_rss = false # The number of articles to include in the RSS feed rss_limit = 20 -# Whether to generate a tags page and individual +# Whether to generate a tags page and individual # tag pages for pages with tags generate_tags_pages = false -# Whether to generate a categories page and individual +# Whether to generate a categories page and individual # category pages for pages with a category generate_categories_pages = false # Whether to compile the Sass files found in the `sass` directory compile_sass = false +# A list of glob patterns specifying asset files to ignore when +# copying content. Defaults to none, which means all asset files +# are copied over to the public folder. Example: +# ignored_content = ["*.{graphml,xlsx}", "temp.*"] +ignored_content = [] + # Optional translation object. The key if present should be a language code [translations] -# You can put any kind of data in there and it +# You can put any kind of data in there and it # will be accessible in all templates [extra] ``` @@ -76,5 +82,5 @@ Gutenberg currently has the following highlight themes available: - solarized-light - 1337 -Gutenberg uses the Sublime Text themes, making it very easy to add more. +Gutenberg uses the Sublime Text themes, making it very easy to add more. If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg).