Filter ignored content in page.rs.index-subcmd
@@ -172,6 +172,7 @@ version = "0.1.0" | |||||
dependencies = [ | dependencies = [ | ||||
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | "chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"errors 0.1.0", | "errors 0.1.0", | ||||
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"highlighting 0.1.0", | "highlighting 0.1.0", | ||||
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | "serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
@@ -185,6 +186,7 @@ dependencies = [ | |||||
"config 0.1.0", | "config 0.1.0", | ||||
"errors 0.1.0", | "errors 0.1.0", | ||||
"front_matter 0.1.0", | "front_matter 0.1.0", | ||||
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | "rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
"rendering 0.1.0", | "rendering 0.1.0", | ||||
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | "serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
@@ -364,6 +366,18 @@ name = "glob" | |||||
version = "0.2.11" | version = "0.2.11" | ||||
source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
[[package]] | |||||
name = "globset" | |||||
version = "0.3.0" | |||||
source = "registry+https://github.com/rust-lang/crates.io-index" | |||||
dependencies = [ | |||||
"aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
"regex 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", | |||||
] | |||||
[[package]] | [[package]] | ||||
name = "gutenberg" | name = "gutenberg" | ||||
version = "0.3.1" | version = "0.3.1" | ||||
@@ -1546,6 +1560,7 @@ dependencies = [ | |||||
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" | "checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" | ||||
"checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05" | "checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05" | ||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" | "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" | ||||
"checksum globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e96ab92362c06811385ae9a34d2698e8a1160745e0c78fbb434a44c8de3fabc" | |||||
"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" | "checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" | ||||
"checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" | "checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" | ||||
"checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2" | "checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2" | ||||
@@ -8,6 +8,7 @@ toml = "0.4" | |||||
serde = "1" | serde = "1" | ||||
serde_derive = "1" | serde_derive = "1" | ||||
chrono = "0.4" | chrono = "0.4" | ||||
globset = "0.3.0" | |||||
errors = { path = "../errors" } | errors = { path = "../errors" } | ||||
highlighting = { path = "../highlighting"} | highlighting = { path = "../highlighting"} |
@@ -5,6 +5,7 @@ extern crate toml; | |||||
extern crate errors; | extern crate errors; | ||||
extern crate highlighting; | extern crate highlighting; | ||||
extern crate chrono; | extern crate chrono; | ||||
extern crate globset; | |||||
use std::collections::HashMap; | use std::collections::HashMap; | ||||
use std::fs::File; | use std::fs::File; | ||||
@@ -13,6 +14,7 @@ use std::path::{Path, PathBuf}; | |||||
use toml::{Value as Toml}; | use toml::{Value as Toml}; | ||||
use chrono::Utc; | use chrono::Utc; | ||||
use globset::{Glob, GlobSet, GlobSetBuilder}; | |||||
use errors::{Result, ResultExt}; | use errors::{Result, ResultExt}; | ||||
use highlighting::THEME_SET; | use highlighting::THEME_SET; | ||||
@@ -22,7 +24,7 @@ mod theme; | |||||
use theme::Theme; | use theme::Theme; | ||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | |||||
#[derive(Clone, Debug, Serialize, Deserialize)] | |||||
pub struct Config { | pub struct Config { | ||||
/// Base URL of the site, the only required config argument | /// Base URL of the site, the only required config argument | ||||
pub base_url: String, | pub base_url: String, | ||||
@@ -49,6 +51,12 @@ pub struct Config { | |||||
pub generate_categories_pages: Option<bool>, | pub generate_categories_pages: Option<bool>, | ||||
/// Whether to compile the `sass` directory and output the css files into the static folder | /// Whether to compile the `sass` directory and output the css files into the static folder | ||||
pub compile_sass: Option<bool>, | pub compile_sass: Option<bool>, | ||||
/// A list of file glob patterns to ignore when processing the content folder. Defaults to none. | |||||
/// Had to remove the PartialEq derive because GlobSet does not implement it. No impact | |||||
/// because it's unused anyway (who wants to sort Configs?). | |||||
pub ignored_content: Option<Vec<String>>, | |||||
#[serde(skip_serializing, skip_deserializing)] | |||||
pub ignored_content_globber: Option<GlobSet>, | |||||
/// Languages list and translated strings | /// Languages list and translated strings | ||||
pub translations: Option<HashMap<String, Toml>>, | pub translations: Option<HashMap<String, Toml>>, | ||||
@@ -84,6 +92,7 @@ impl Config { | |||||
set_default!(config.generate_tags_pages, false); | set_default!(config.generate_tags_pages, false); | ||||
set_default!(config.generate_categories_pages, false); | set_default!(config.generate_categories_pages, false); | ||||
set_default!(config.compile_sass, false); | set_default!(config.compile_sass, false); | ||||
set_default!(config.ignored_content, Vec::new()); | |||||
set_default!(config.translations, HashMap::new()); | set_default!(config.translations, HashMap::new()); | ||||
set_default!(config.extra, HashMap::new()); | set_default!(config.extra, HashMap::new()); | ||||
@@ -97,6 +106,25 @@ impl Config { | |||||
}; | }; | ||||
config.build_timestamp = Some(Utc::now().timestamp()); | config.build_timestamp = Some(Utc::now().timestamp()); | ||||
// Convert the file glob strings into a compiled glob set matcher. We want to do this once, | |||||
// at program initialization, rather than for every page, for example. We arrange for the | |||||
// globset matcher to always exist (even though it has to be an inside an Option at the | |||||
// moment because of the TOML serializer); if the glob set is empty the `is_match` function | |||||
// of the globber always returns false. | |||||
let mut glob_set_builder = GlobSetBuilder::new(); | |||||
if let Some(ref v) = config.ignored_content { | |||||
for pat in v { | |||||
let glob = match Glob::new(pat) { | |||||
Ok(g) => g, | |||||
Err(e) => bail!("Invalid ignored_content glob pattern: {}, error = {}", pat, e) | |||||
}; | |||||
glob_set_builder.add(glob); | |||||
} | |||||
} | |||||
config.ignored_content_globber = Some(glob_set_builder.build().expect("Bad ignored_content in config file.")); | |||||
Ok(config) | Ok(config) | ||||
} | } | ||||
@@ -176,6 +204,8 @@ impl Default for Config { | |||||
generate_tags_pages: Some(true), | generate_tags_pages: Some(true), | ||||
generate_categories_pages: Some(true), | generate_categories_pages: Some(true), | ||||
compile_sass: Some(false), | compile_sass: Some(false), | ||||
ignored_content: Some(Vec::new()), | |||||
ignored_content_globber: Some(GlobSetBuilder::new().build().unwrap()), | |||||
translations: None, | translations: None, | ||||
extra: None, | extra: None, | ||||
build_timestamp: Some(1), | build_timestamp: Some(1), | ||||
@@ -330,4 +360,51 @@ title = "A title" | |||||
assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title"); | assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title"); | ||||
} | } | ||||
#[test] | |||||
fn missing_ignored_content_results_in_empty_vector_and_empty_globber() { | |||||
let config_str = r#" | |||||
title = "My site" | |||||
base_url = "example.com" | |||||
"#; | |||||
let config = Config::parse(config_str).unwrap(); | |||||
let v = config.ignored_content.unwrap(); | |||||
assert_eq!(v.len(), 0); | |||||
assert!(config.ignored_content_globber.unwrap().is_empty()); | |||||
} | |||||
#[test] | |||||
fn empty_ignored_content_results_in_empty_vector_and_empty_globber() { | |||||
let config_str = r#" | |||||
title = "My site" | |||||
base_url = "example.com" | |||||
ignored_content = [] | |||||
"#; | |||||
let config = Config::parse(config_str).unwrap(); | |||||
assert_eq!(config.ignored_content.unwrap().len(), 0); | |||||
assert!(config.ignored_content_globber.unwrap().is_empty()); | |||||
} | |||||
#[test] | |||||
fn non_empty_ignored_content_results_in_vector_of_patterns_and_configured_globber() { | |||||
let config_str = r#" | |||||
title = "My site" | |||||
base_url = "example.com" | |||||
ignored_content = ["*.{graphml,iso}", "*.py?"] | |||||
"#; | |||||
let config = Config::parse(config_str).unwrap(); | |||||
let v = config.ignored_content.unwrap(); | |||||
assert_eq!(v, vec!["*.{graphml,iso}", "*.py?"]); | |||||
let g = config.ignored_content_globber.unwrap(); | |||||
assert_eq!(g.len(), 2); | |||||
assert!(g.is_match("foo.graphml")); | |||||
assert!(g.is_match("foo.iso")); | |||||
assert!(!g.is_match("foo.png")); | |||||
assert!(g.is_match("foo.py2")); | |||||
assert!(g.is_match("foo.py3")); | |||||
assert!(!g.is_match("foo.py")); | |||||
} | |||||
} | } |
@@ -18,3 +18,4 @@ front_matter = { path = "../front_matter" } | |||||
[dev-dependencies] | [dev-dependencies] | ||||
tempdir = "0.3" | tempdir = "0.3" | ||||
toml = "0.4" | toml = "0.4" | ||||
globset = "0.3.0" |
@@ -13,6 +13,8 @@ extern crate utils; | |||||
extern crate tempdir; | extern crate tempdir; | ||||
#[cfg(test)] | #[cfg(test)] | ||||
extern crate toml; | extern crate toml; | ||||
#[cfg(test)] | |||||
extern crate globset; | |||||
mod file_info; | mod file_info; | ||||
mod page; | mod page; | ||||
@@ -128,10 +128,27 @@ impl Page { | |||||
let path = path.as_ref(); | let path = path.as_ref(); | ||||
let content = read_file(path)?; | let content = read_file(path)?; | ||||
let mut page = Page::parse(path, &content, config)?; | let mut page = Page::parse(path, &content, config)?; | ||||
page.assets = vec![]; | |||||
if page.file.name == "index" { | if page.file.name == "index" { | ||||
page.assets = find_related_assets(path.parent().unwrap()); | |||||
// `find_related_assets` only scans the immediate directory (it is not recursive) so our | |||||
// filtering only needs to work against the file_name component, not the full suffix. If | |||||
// `find_related_assets` was changed to also return files in subdirectories, we could | |||||
// use `PathBuf.strip_prefix` to remove the parent directory and then glob-filter | |||||
// against the remaining path. Note that the current behaviour effectively means that | |||||
// the `ignored_content` setting in the config file is limited to single-file glob | |||||
// patterns (no "**" patterns). | |||||
let globber = config.ignored_content_globber.as_ref().unwrap(); | |||||
let parent_dir = path.parent().unwrap(); | |||||
page.assets = find_related_assets(parent_dir).into_iter() | |||||
.filter(|path| | |||||
match path.file_name() { | |||||
None => true, | |||||
Some(file) => !globber.is_match(file) | |||||
} | |||||
).collect(); | |||||
} else { | |||||
page.assets = vec![]; | |||||
} | } | ||||
Ok(page) | Ok(page) | ||||
@@ -240,6 +257,7 @@ mod tests { | |||||
use tera::Tera; | use tera::Tera; | ||||
use tempdir::TempDir; | use tempdir::TempDir; | ||||
use globset::{Glob, GlobSetBuilder}; | |||||
use config::Config; | use config::Config; | ||||
use super::Page; | use super::Page; | ||||
@@ -419,4 +437,34 @@ Hello world | |||||
assert_eq!(page.assets.len(), 3); | assert_eq!(page.assets.len(), 3); | ||||
assert_eq!(page.permalink, "http://a-website.com/posts/hey/"); | assert_eq!(page.permalink, "http://a-website.com/posts/hey/"); | ||||
} | } | ||||
#[test] | |||||
fn page_with_ignored_assets_filters_out_correct_files() { | |||||
let tmp_dir = TempDir::new("example").expect("create temp dir"); | |||||
let path = tmp_dir.path(); | |||||
create_dir(&path.join("content")).expect("create content temp dir"); | |||||
create_dir(&path.join("content").join("posts")).expect("create posts temp dir"); | |||||
let nested_path = path.join("content").join("posts").join("with-assets"); | |||||
create_dir(&nested_path).expect("create nested temp dir"); | |||||
let mut f = File::create(nested_path.join("index.md")).unwrap(); | |||||
f.write_all(b"+++\nslug=\"hey\"\n+++\n").unwrap(); | |||||
File::create(nested_path.join("example.js")).unwrap(); | |||||
File::create(nested_path.join("graph.jpg")).unwrap(); | |||||
File::create(nested_path.join("fail.png")).unwrap(); | |||||
let mut gsb = GlobSetBuilder::new(); | |||||
gsb.add(Glob::new("*.{js,png}").unwrap()); | |||||
let mut config = Config::default(); | |||||
config.ignored_content_globber = Some(gsb.build().unwrap()); | |||||
let res = Page::from_file( | |||||
nested_path.join("index.md").as_path(), | |||||
&config | |||||
); | |||||
assert!(res.is_ok()); | |||||
let page = res.unwrap(); | |||||
assert_eq!(page.assets.len(), 1); | |||||
assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg")); | |||||
} | |||||
} | } |
@@ -5,8 +5,8 @@ weight = 10 | |||||
Gutenberg uses the folder structure to determine the site structure. | Gutenberg uses the folder structure to determine the site structure. | ||||
Each folder in the `content` directory represents a [section](./documentation/content/section.md) | |||||
that contains [pages](./documentation/content/page.md): your `.md` files. | |||||
Each folder in the `content` directory represents a [section](./documentation/content/section.md) | |||||
that contains [pages](./documentation/content/page.md): your `.md` files. | |||||
```bash | ```bash | ||||
. | . | ||||
@@ -26,21 +26,21 @@ that contains [pages](./documentation/content/page.md): your `.md` files. | |||||
Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug` | Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug` | ||||
attribute of the [page front-matter](./documentation/content/page.md#front-matter). | attribute of the [page front-matter](./documentation/content/page.md#front-matter). | ||||
You might have noticed a file named `_index.md` in the example above. | |||||
You might have noticed a file named `_index.md` in the example above. | |||||
This file will be used for the metadata and content of the section itself and is not considered a page. | This file will be used for the metadata and content of the section itself and is not considered a page. | ||||
To make sure the terminology used in the rest of the documentation is understood, let's go over the example above. | To make sure the terminology used in the rest of the documentation is understood, let's go over the example above. | ||||
The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only | The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only | ||||
one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` | |||||
one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` | |||||
and `installation.md`. | and `installation.md`. | ||||
While not shown in the example, sections can be nested indefinitely. | While not shown in the example, sections can be nested indefinitely. | ||||
## Assets colocation | ## Assets colocation | ||||
The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related | |||||
assets. | |||||
The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related | |||||
assets. | |||||
Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want. | Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want. | ||||
Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them. | Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them. | ||||
@@ -52,3 +52,14 @@ Those assets will be copied in the same folder when building the site which allo | |||||
``` | ``` | ||||
By default, this page will get the folder name (`with-assets` in this case) as its slug. | By default, this page will get the folder name (`with-assets` in this case) as its slug. | ||||
It is possible to ignore selected asset files using the | |||||
[ignored_content](./documentation/getting-started/configuration.md) setting in the config file. | |||||
For example, say you have an Excel spreadsheet from which you are taking several screenshots and | |||||
then linking to those image files on your website. For maintainability purposes, you want to keep | |||||
the spreadsheet in the same folder as the markdown, but you don't want to copy the spreadsheet to | |||||
the public web site. You can achieve this by simply setting `ignored_content` in the config file: | |||||
``` | |||||
ignored_content = ["*.xlsx"] | |||||
``` |
@@ -3,10 +3,10 @@ title = "Configuration" | |||||
weight = 4 | weight = 4 | ||||
+++ | +++ | ||||
The default configuration will be enough to get Gutenberg running locally but not more than that. | |||||
The default configuration will be enough to get Gutenberg running locally but not more than that. | |||||
It follows the philosophy of only paying for what you need: almost everything is turned off by default. | It follows the philosophy of only paying for what you need: almost everything is turned off by default. | ||||
To change the config, edit the `config.toml` file. | |||||
To change the config, edit the `config.toml` file. | |||||
If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml) | If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml) | ||||
to learn about it. | to learn about it. | ||||
@@ -30,7 +30,7 @@ theme = "" | |||||
# Highlight all code blocks found | # Highlight all code blocks found | ||||
highlight_code = false | highlight_code = false | ||||
# Which theme to use for the code highlighting. | |||||
# Which theme to use for the code highlighting. | |||||
# See below for list of accepted values | # See below for list of accepted values | ||||
highlight_theme = "base16-ocean-dark" | highlight_theme = "base16-ocean-dark" | ||||
@@ -40,21 +40,27 @@ generate_rss = false | |||||
# The number of articles to include in the RSS feed | # The number of articles to include in the RSS feed | ||||
rss_limit = 20 | rss_limit = 20 | ||||
# Whether to generate a tags page and individual | |||||
# Whether to generate a tags page and individual | |||||
# tag pages for pages with tags | # tag pages for pages with tags | ||||
generate_tags_pages = false | generate_tags_pages = false | ||||
# Whether to generate a categories page and individual | |||||
# Whether to generate a categories page and individual | |||||
# category pages for pages with a category | # category pages for pages with a category | ||||
generate_categories_pages = false | generate_categories_pages = false | ||||
# Whether to compile the Sass files found in the `sass` directory | # Whether to compile the Sass files found in the `sass` directory | ||||
compile_sass = false | compile_sass = false | ||||
# A list of glob patterns specifying asset files to ignore when | |||||
# copying content. Defaults to none, which means all asset files | |||||
# are copied over to the public folder. Example: | |||||
# ignored_content = ["*.{graphml,xlsx}", "temp.*"] | |||||
ignored_content = [] | |||||
# Optional translation object. The key if present should be a language code | # Optional translation object. The key if present should be a language code | ||||
[translations] | [translations] | ||||
# You can put any kind of data in there and it | |||||
# You can put any kind of data in there and it | |||||
# will be accessible in all templates | # will be accessible in all templates | ||||
[extra] | [extra] | ||||
``` | ``` | ||||
@@ -76,5 +82,5 @@ Gutenberg currently has the following highlight themes available: | |||||
- solarized-light | - solarized-light | ||||
- 1337 | - 1337 | ||||
Gutenberg uses the Sublime Text themes, making it very easy to add more. | |||||
Gutenberg uses the Sublime Text themes, making it very easy to add more. | |||||
If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg). | If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg). |