From 1a9ab968fe84db8a669f024f6c3835dbf4cd8dca Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 21 May 2019 06:08:49 +1000 Subject: [PATCH] Allow manual specification of header IDs (#685) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Justification for this feature is added in the docs. Precedent for the precise syntax: Hugo. Hugo puts this syntax behind a preference named headerIds, and automatic header ID generation behind a preference named autoHeaderIds, with both enabled by default. I have not implemented a switch to disable this. My suggestion for a workaround for the improbable case of desiring a literal “{#…}” at the end of a header is to replace `}` with `}`. The algorithm I have used is not identical to [that which Hugo uses][0], because Hugo’s looks to work at the source level, whereas here we work at the pulldown-cmark event level, which is generally more sane, but potentially limiting for extremely esoteric IDs. Practical differences in implementation from Hugo (based purely on reading [blackfriday’s implementation][0], not actually trying it): - I believe Hugo would treat `# Foo {#*bar*}` as a heading with text “Foo” and ID `*bar*`, since it is working at the source level; whereas this code turns it into a heading with HTML `Foo {#bar}`, as it works at the pulldown-cmark event level and doesn’t go out of its way to make that work (I’m not familiar with pulldown-cmark, but I get the impression that you could make it work Hugo’s way on this point). The difference should be negligible: only *very* esoteric hashes would include magic Markdown characters. - Hugo will automatically generate an ID for `{#}`, whereas what I’ve coded here will yield a blank ID instead (which feels more correct to me—`None` versus `Some("")`, and all that). In practice the results should be identical. Fixes #433. [0]: https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L218-L234 --- components/rendering/src/markdown.rs | 28 +++++++++++-- components/rendering/tests/markdown.rs | 40 +++++++++++++++++++ docs/content/documentation/content/linking.md | 8 ++++ 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs index 6c34a58..49d224e 100644 --- a/components/rendering/src/markdown.rs +++ b/components/rendering/src/markdown.rs @@ -33,11 +33,12 @@ struct HeaderRef { start_idx: usize, end_idx: usize, level: i32, + id: Option, } impl HeaderRef { fn new(start: usize, level: i32) -> HeaderRef { - HeaderRef { start_idx: start, end_idx: 0, level } + HeaderRef { start_idx: start, end_idx: 0, level, id: None } } } @@ -225,15 +226,36 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Result>(); // We need to collect the events to make a second pass - let header_refs = get_header_refs(&events); + let mut header_refs = get_header_refs(&events); let mut anchors_to_insert = vec![]; + // First header pass: look for a manually-specified IDs, e.g. `# Heading text {#hash}` + // (This is a separate first pass so that auto IDs can avoid collisions with manual IDs.) + for header_ref in header_refs.iter_mut() { + let end_idx = header_ref.end_idx; + if let Event::Text(ref mut text) = events[end_idx - 1] { + if text.as_bytes().last() == Some(&b'}') { + if let Some(mut i) = text.find("{#") { + let id = text[i + 2..text.len() - 1].to_owned(); + inserted_anchors.push(id.clone()); + while i > 0 && text.as_bytes()[i - 1] == b' ' { + i -= 1; + } + header_ref.id = Some(id); + *text = text[..i].to_owned().into(); + } + } + } + } + + // Second header pass: auto-generate remaining IDs, and emit HTML for header_ref in header_refs { let start_idx = header_ref.start_idx; let end_idx = header_ref.end_idx; let title = get_text(&events[start_idx + 1..end_idx]); - let id = find_anchor(&inserted_anchors, slugify(&title), 0); + let id = header_ref.id.unwrap_or_else( + || find_anchor(&inserted_anchors, slugify(&title), 0)); inserted_anchors.push(id.clone()); // insert `id` to the tag diff --git a/components/rendering/tests/markdown.rs b/components/rendering/tests/markdown.rs index e2f36ce..ec1e20f 100644 --- a/components/rendering/tests/markdown.rs +++ b/components/rendering/tests/markdown.rs @@ -351,6 +351,46 @@ fn can_add_id_to_headers_same_slug() { assert_eq!(res.body, "

Hello

\n

Hello

\n"); } +#[test] +fn can_handle_manual_ids_on_headers() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let config = Config::default(); + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + // Tested things: manual IDs; whitespace flexibility; that automatic IDs avoid collision with + // manual IDs; that duplicates are in fact permitted among manual IDs; that any non-plain-text + // in the middle of `{#…}` will disrupt it from being acknowledged as a manual ID (that last + // one could reasonably be considered a bug rather than a feature, but test it either way); one + // workaround for the improbable case where you actually want `{#…}` at the end of a header. + let res = render_content("\ + # Hello\n\ + # Hello{#hello}\n\ + # Hello {#hello}\n\ + # Hello {#Something_else} \n\ + # Workaround for literal {#…}\n\ + # Hello\n\ + # Auto {#*matic*}", &context).unwrap(); + assert_eq!(res.body, "\ +

Hello

\n\ +

Hello

\n\ +

Hello

\n\ +

Hello

\n\ +

Workaround for literal {#…}

\n\ +

Hello

\n\ +

Auto {#matic}

\n\ + "); +} + +#[test] +fn blank_headers() { + let tera_ctx = Tera::default(); + let permalinks_ctx = HashMap::new(); + let config = Config::default(); + let context = RenderContext::new(&tera_ctx, &config, "", &permalinks_ctx, InsertAnchor::None); + let res = render_content("# \n#\n# {#hmm} \n# {#}", &context).unwrap(); + assert_eq!(res.body, "

\n

\n

\n

\n"); +} + #[test] fn can_insert_anchor_left() { let permalinks_ctx = HashMap::new(); diff --git a/docs/content/documentation/content/linking.md b/docs/content/documentation/content/linking.md index 1ce72b0..25732f2 100644 --- a/docs/content/documentation/content/linking.md +++ b/docs/content/documentation/content/linking.md @@ -16,6 +16,14 @@ if the slug already exists for that article. For example: ## Example code <- example-code-1 ``` +You can also manually specify an id with a `{#…}` suffix on the header line: + +```md +# Something manual! {#manual} +``` + +This is useful for making deep links robust, either proactively (so that you can later change the text of a header without breaking links to it) or retroactively (keeping the slug of the old header text, when changing the text). It can also be useful for migration of existing sites with different header id schemes, so that you can keep deep links working. + ## Anchor insertion It is possible to have Zola automatically insert anchor links next to the header, as you can see on the site you are currently reading if you hover a title.