Filter ignored content in page.rs.index-subcmd
@@ -172,6 +172,7 @@ version = "0.1.0" | |||
dependencies = [ | |||
"chrono 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"errors 0.1.0", | |||
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"highlighting 0.1.0", | |||
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | |||
@@ -185,6 +186,7 @@ dependencies = [ | |||
"config 0.1.0", | |||
"errors 0.1.0", | |||
"front_matter 0.1.0", | |||
"globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"rendering 0.1.0", | |||
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", | |||
@@ -364,6 +366,18 @@ name = "glob" | |||
version = "0.2.11" | |||
source = "registry+https://github.com/rust-lang/crates.io-index" | |||
[[package]] | |||
name = "globset" | |||
version = "0.3.0" | |||
source = "registry+https://github.com/rust-lang/crates.io-index" | |||
dependencies = [ | |||
"aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", | |||
"regex 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", | |||
] | |||
[[package]] | |||
name = "gutenberg" | |||
version = "0.3.1" | |||
@@ -1546,6 +1560,7 @@ dependencies = [ | |||
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb" | |||
"checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05" | |||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb" | |||
"checksum globset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1e96ab92362c06811385ae9a34d2698e8a1160745e0c78fbb434a44c8de3fabc" | |||
"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" | |||
"checksum humansize 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" | |||
"checksum hyper 0.10.13 (registry+https://github.com/rust-lang/crates.io-index)" = "368cb56b2740ebf4230520e2b90ebb0461e69034d85d1945febd9b3971426db2" | |||
@@ -8,6 +8,7 @@ toml = "0.4" | |||
serde = "1" | |||
serde_derive = "1" | |||
chrono = "0.4" | |||
globset = "0.3.0" | |||
errors = { path = "../errors" } | |||
highlighting = { path = "../highlighting"} |
@@ -5,6 +5,7 @@ extern crate toml; | |||
extern crate errors; | |||
extern crate highlighting; | |||
extern crate chrono; | |||
extern crate globset; | |||
use std::collections::HashMap; | |||
use std::fs::File; | |||
@@ -13,6 +14,7 @@ use std::path::{Path, PathBuf}; | |||
use toml::{Value as Toml}; | |||
use chrono::Utc; | |||
use globset::{Glob, GlobSet, GlobSetBuilder}; | |||
use errors::{Result, ResultExt}; | |||
use highlighting::THEME_SET; | |||
@@ -22,7 +24,7 @@ mod theme; | |||
use theme::Theme; | |||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | |||
#[derive(Clone, Debug, Serialize, Deserialize)] | |||
pub struct Config { | |||
/// Base URL of the site, the only required config argument | |||
pub base_url: String, | |||
@@ -49,6 +51,12 @@ pub struct Config { | |||
pub generate_categories_pages: Option<bool>, | |||
/// Whether to compile the `sass` directory and output the css files into the static folder | |||
pub compile_sass: Option<bool>, | |||
/// A list of file glob patterns to ignore when processing the content folder. Defaults to none. | |||
/// Had to remove the PartialEq derive because GlobSet does not implement it. No impact | |||
/// because it's unused anyway (who wants to sort Configs?). | |||
pub ignored_content: Option<Vec<String>>, | |||
#[serde(skip_serializing, skip_deserializing)] | |||
pub ignored_content_globber: Option<GlobSet>, | |||
/// Languages list and translated strings | |||
pub translations: Option<HashMap<String, Toml>>, | |||
@@ -84,6 +92,7 @@ impl Config { | |||
set_default!(config.generate_tags_pages, false); | |||
set_default!(config.generate_categories_pages, false); | |||
set_default!(config.compile_sass, false); | |||
set_default!(config.ignored_content, Vec::new()); | |||
set_default!(config.translations, HashMap::new()); | |||
set_default!(config.extra, HashMap::new()); | |||
@@ -97,6 +106,25 @@ impl Config { | |||
}; | |||
config.build_timestamp = Some(Utc::now().timestamp()); | |||
// Convert the file glob strings into a compiled glob set matcher. We want to do this once, | |||
// at program initialization, rather than for every page, for example. We arrange for the | |||
// globset matcher to always exist (even though it has to be an inside an Option at the | |||
// moment because of the TOML serializer); if the glob set is empty the `is_match` function | |||
// of the globber always returns false. | |||
let mut glob_set_builder = GlobSetBuilder::new(); | |||
if let Some(ref v) = config.ignored_content { | |||
for pat in v { | |||
let glob = match Glob::new(pat) { | |||
Ok(g) => g, | |||
Err(e) => bail!("Invalid ignored_content glob pattern: {}, error = {}", pat, e) | |||
}; | |||
glob_set_builder.add(glob); | |||
} | |||
} | |||
config.ignored_content_globber = Some(glob_set_builder.build().expect("Bad ignored_content in config file.")); | |||
Ok(config) | |||
} | |||
@@ -176,6 +204,8 @@ impl Default for Config { | |||
generate_tags_pages: Some(true), | |||
generate_categories_pages: Some(true), | |||
compile_sass: Some(false), | |||
ignored_content: Some(Vec::new()), | |||
ignored_content_globber: Some(GlobSetBuilder::new().build().unwrap()), | |||
translations: None, | |||
extra: None, | |||
build_timestamp: Some(1), | |||
@@ -330,4 +360,51 @@ title = "A title" | |||
assert_eq!(translations["en"]["title"].as_str().unwrap(), "A title"); | |||
} | |||
#[test] | |||
fn missing_ignored_content_results_in_empty_vector_and_empty_globber() { | |||
let config_str = r#" | |||
title = "My site" | |||
base_url = "example.com" | |||
"#; | |||
let config = Config::parse(config_str).unwrap(); | |||
let v = config.ignored_content.unwrap(); | |||
assert_eq!(v.len(), 0); | |||
assert!(config.ignored_content_globber.unwrap().is_empty()); | |||
} | |||
#[test] | |||
fn empty_ignored_content_results_in_empty_vector_and_empty_globber() { | |||
let config_str = r#" | |||
title = "My site" | |||
base_url = "example.com" | |||
ignored_content = [] | |||
"#; | |||
let config = Config::parse(config_str).unwrap(); | |||
assert_eq!(config.ignored_content.unwrap().len(), 0); | |||
assert!(config.ignored_content_globber.unwrap().is_empty()); | |||
} | |||
#[test] | |||
fn non_empty_ignored_content_results_in_vector_of_patterns_and_configured_globber() { | |||
let config_str = r#" | |||
title = "My site" | |||
base_url = "example.com" | |||
ignored_content = ["*.{graphml,iso}", "*.py?"] | |||
"#; | |||
let config = Config::parse(config_str).unwrap(); | |||
let v = config.ignored_content.unwrap(); | |||
assert_eq!(v, vec!["*.{graphml,iso}", "*.py?"]); | |||
let g = config.ignored_content_globber.unwrap(); | |||
assert_eq!(g.len(), 2); | |||
assert!(g.is_match("foo.graphml")); | |||
assert!(g.is_match("foo.iso")); | |||
assert!(!g.is_match("foo.png")); | |||
assert!(g.is_match("foo.py2")); | |||
assert!(g.is_match("foo.py3")); | |||
assert!(!g.is_match("foo.py")); | |||
} | |||
} |
@@ -18,3 +18,4 @@ front_matter = { path = "../front_matter" } | |||
[dev-dependencies] | |||
tempdir = "0.3" | |||
toml = "0.4" | |||
globset = "0.3.0" |
@@ -13,6 +13,8 @@ extern crate utils; | |||
extern crate tempdir; | |||
#[cfg(test)] | |||
extern crate toml; | |||
#[cfg(test)] | |||
extern crate globset; | |||
mod file_info; | |||
mod page; | |||
@@ -128,10 +128,27 @@ impl Page { | |||
let path = path.as_ref(); | |||
let content = read_file(path)?; | |||
let mut page = Page::parse(path, &content, config)?; | |||
page.assets = vec![]; | |||
if page.file.name == "index" { | |||
page.assets = find_related_assets(path.parent().unwrap()); | |||
// `find_related_assets` only scans the immediate directory (it is not recursive) so our | |||
// filtering only needs to work against the file_name component, not the full suffix. If | |||
// `find_related_assets` was changed to also return files in subdirectories, we could | |||
// use `PathBuf.strip_prefix` to remove the parent directory and then glob-filter | |||
// against the remaining path. Note that the current behaviour effectively means that | |||
// the `ignored_content` setting in the config file is limited to single-file glob | |||
// patterns (no "**" patterns). | |||
let globber = config.ignored_content_globber.as_ref().unwrap(); | |||
let parent_dir = path.parent().unwrap(); | |||
page.assets = find_related_assets(parent_dir).into_iter() | |||
.filter(|path| | |||
match path.file_name() { | |||
None => true, | |||
Some(file) => !globber.is_match(file) | |||
} | |||
).collect(); | |||
} else { | |||
page.assets = vec![]; | |||
} | |||
Ok(page) | |||
@@ -240,6 +257,7 @@ mod tests { | |||
use tera::Tera; | |||
use tempdir::TempDir; | |||
use globset::{Glob, GlobSetBuilder}; | |||
use config::Config; | |||
use super::Page; | |||
@@ -419,4 +437,34 @@ Hello world | |||
assert_eq!(page.assets.len(), 3); | |||
assert_eq!(page.permalink, "http://a-website.com/posts/hey/"); | |||
} | |||
#[test] | |||
fn page_with_ignored_assets_filters_out_correct_files() { | |||
let tmp_dir = TempDir::new("example").expect("create temp dir"); | |||
let path = tmp_dir.path(); | |||
create_dir(&path.join("content")).expect("create content temp dir"); | |||
create_dir(&path.join("content").join("posts")).expect("create posts temp dir"); | |||
let nested_path = path.join("content").join("posts").join("with-assets"); | |||
create_dir(&nested_path).expect("create nested temp dir"); | |||
let mut f = File::create(nested_path.join("index.md")).unwrap(); | |||
f.write_all(b"+++\nslug=\"hey\"\n+++\n").unwrap(); | |||
File::create(nested_path.join("example.js")).unwrap(); | |||
File::create(nested_path.join("graph.jpg")).unwrap(); | |||
File::create(nested_path.join("fail.png")).unwrap(); | |||
let mut gsb = GlobSetBuilder::new(); | |||
gsb.add(Glob::new("*.{js,png}").unwrap()); | |||
let mut config = Config::default(); | |||
config.ignored_content_globber = Some(gsb.build().unwrap()); | |||
let res = Page::from_file( | |||
nested_path.join("index.md").as_path(), | |||
&config | |||
); | |||
assert!(res.is_ok()); | |||
let page = res.unwrap(); | |||
assert_eq!(page.assets.len(), 1); | |||
assert_eq!(page.assets[0].file_name().unwrap().to_str(), Some("graph.jpg")); | |||
} | |||
} |
@@ -5,8 +5,8 @@ weight = 10 | |||
Gutenberg uses the folder structure to determine the site structure. | |||
Each folder in the `content` directory represents a [section](./documentation/content/section.md) | |||
that contains [pages](./documentation/content/page.md): your `.md` files. | |||
Each folder in the `content` directory represents a [section](./documentation/content/section.md) | |||
that contains [pages](./documentation/content/page.md): your `.md` files. | |||
```bash | |||
. | |||
@@ -26,21 +26,21 @@ that contains [pages](./documentation/content/page.md): your `.md` files. | |||
Each page path (the part after the `base_url`, for example `blog/cli-usage/`) can be customised by changing the `path` or `slug` | |||
attribute of the [page front-matter](./documentation/content/page.md#front-matter). | |||
You might have noticed a file named `_index.md` in the example above. | |||
You might have noticed a file named `_index.md` in the example above. | |||
This file will be used for the metadata and content of the section itself and is not considered a page. | |||
To make sure the terminology used in the rest of the documentation is understood, let's go over the example above. | |||
The `content` directory in this case has three `sections`: `content`, `blog` and `landing`. The `content` section has only | |||
one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` | |||
one page, `something.md`, the `landing` section has no page and the `blog` section has 4 pages: `cli-usage.md`, `configuration.md`, `directory-structure.md` | |||
and `installation.md`. | |||
While not shown in the example, sections can be nested indefinitely. | |||
## Assets colocation | |||
The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related | |||
assets. | |||
The `content` directory is not limited to markup files though: it's natural to want to co-locate a page and some related | |||
assets. | |||
Gutenberg supports that pattern out of the box: create a folder, add a `index.md` file and as many non-markdown files as you want. | |||
Those assets will be copied in the same folder when building the site which allows you to use a relative path to access them. | |||
@@ -52,3 +52,14 @@ Those assets will be copied in the same folder when building the site which allo | |||
``` | |||
By default, this page will get the folder name (`with-assets` in this case) as its slug. | |||
It is possible to ignore selected asset files using the | |||
[ignored_content](./documentation/getting-started/configuration.md) setting in the config file. | |||
For example, say you have an Excel spreadsheet from which you are taking several screenshots and | |||
then linking to those image files on your website. For maintainability purposes, you want to keep | |||
the spreadsheet in the same folder as the markdown, but you don't want to copy the spreadsheet to | |||
the public web site. You can achieve this by simply setting `ignored_content` in the config file: | |||
``` | |||
ignored_content = ["*.xlsx"] | |||
``` |
@@ -3,10 +3,10 @@ title = "Configuration" | |||
weight = 4 | |||
+++ | |||
The default configuration will be enough to get Gutenberg running locally but not more than that. | |||
The default configuration will be enough to get Gutenberg running locally but not more than that. | |||
It follows the philosophy of only paying for what you need: almost everything is turned off by default. | |||
To change the config, edit the `config.toml` file. | |||
To change the config, edit the `config.toml` file. | |||
If you are not familiar with TOML, have a look at [the TOML Spec](https://github.com/toml-lang/toml) | |||
to learn about it. | |||
@@ -30,7 +30,7 @@ theme = "" | |||
# Highlight all code blocks found | |||
highlight_code = false | |||
# Which theme to use for the code highlighting. | |||
# Which theme to use for the code highlighting. | |||
# See below for list of accepted values | |||
highlight_theme = "base16-ocean-dark" | |||
@@ -40,21 +40,27 @@ generate_rss = false | |||
# The number of articles to include in the RSS feed | |||
rss_limit = 20 | |||
# Whether to generate a tags page and individual | |||
# Whether to generate a tags page and individual | |||
# tag pages for pages with tags | |||
generate_tags_pages = false | |||
# Whether to generate a categories page and individual | |||
# Whether to generate a categories page and individual | |||
# category pages for pages with a category | |||
generate_categories_pages = false | |||
# Whether to compile the Sass files found in the `sass` directory | |||
compile_sass = false | |||
# A list of glob patterns specifying asset files to ignore when | |||
# copying content. Defaults to none, which means all asset files | |||
# are copied over to the public folder. Example: | |||
# ignored_content = ["*.{graphml,xlsx}", "temp.*"] | |||
ignored_content = [] | |||
# Optional translation object. The key if present should be a language code | |||
[translations] | |||
# You can put any kind of data in there and it | |||
# You can put any kind of data in there and it | |||
# will be accessible in all templates | |||
[extra] | |||
``` | |||
@@ -76,5 +82,5 @@ Gutenberg currently has the following highlight themes available: | |||
- solarized-light | |||
- 1337 | |||
Gutenberg uses the Sublime Text themes, making it very easy to add more. | |||
Gutenberg uses the Sublime Text themes, making it very easy to add more. | |||
If you want a theme not on that list, please open an issue or a pull request on the [Gutenberg repo](https://github.com/Keats/gutenberg). |