Browse Source

Improve sorting speed

index-subcmd
Vincent Prouillet 6 years ago
parent
commit
37af36186a
8 changed files with 212 additions and 76 deletions
  1. +1
    -0
      CHANGELOG.md
  2. +1
    -1
      appveyor.yml
  3. +2
    -2
      ci/script.sh
  4. +135
    -0
      components/content/benches/sorting.rs
  5. +59
    -69
      components/content/src/sorting.rs
  6. +7
    -0
      components/front_matter/src/page.rs
  7. +5
    -3
      components/site/src/lib.rs
  8. +2
    -1
      components/taxonomies/src/lib.rs

+ 1
- 0
CHANGELOG.md View File

@@ -6,6 +6,7 @@
- Add weight sorting
- Remove `section` from the `page` rendering context: this is too expensive. Use
the global function `get_section` if you need to get it
- Fix next/previous in pagination being incorrect

## 0.0.7 (2017-06-19)



+ 1
- 1
appveyor.yml View File

@@ -35,7 +35,7 @@ install:
test_script:
# we don't run the "test phase" when doing deploys
- if [%APPVEYOR_REPO_TAG%]==[false] (
cargo test --target %TARGET%
cargo test -all --target %TARGET%
)

before_deploy:


+ 2
- 2
ci/script.sh View File

@@ -11,8 +11,8 @@ main() {
return
fi

cross test --target $TARGET
cross test --target $TARGET --release
cross test --all --target $TARGET
cross test --all --target $TARGET --release
}

# we don't run the "test phase" when doing deploys


+ 135
- 0
components/content/benches/sorting.rs View File

@@ -0,0 +1,135 @@
#![feature(test)]
extern crate test;
extern crate tera;

extern crate content;
extern crate front_matter;
extern crate config;

use std::collections::HashMap;

use config::Config;
use tera::Tera;
use front_matter::{SortBy, InsertAnchor};
use content::{Page, sort_pages, populate_previous_and_next_pages};


fn create_pages(number: usize, sort_by: SortBy) -> Vec<Page> {
let mut pages = vec![];
let config = Config::default();
let tera = Tera::default();
let permalinks = HashMap::new();

for i in 0..number {
let mut page = Page::default();
match sort_by {
SortBy::Weight => { page.meta.weight = Some(i); },
SortBy::Order => { page.meta.order = Some(i); },
_ => (),
};
page.raw_content = r#"
# Modus cognitius profanam ne duae virtutis mundi

## Ut vita

Lorem markdownum litora, care ponto nomina, et ut aspicit gelidas sui et
purpureo genuit. Tamen colla venientis [delphina](http://nil-sol.com/ecquis)
Tusci et temptata citaeque curam isto ubi vult vulnere reppulit.

- Seque vidit flendoque de quodam
- Dabit minimos deiecto caputque noctis pluma
- Leti coniunx est Helicen
- Illius pulvereumque Icare inpositos
- Vivunt pereo pluvio tot ramos Olenios gelidis
- Quater teretes natura inde

### A subsection

Protinus dicunt, breve per, et vivacis genus Orphei munere. Me terram [dimittere
casside](http://corpus.org/) pervenit saxo primoque frequentat genuum sorori
praeferre causas Libys. Illud in serpit adsuetam utrimque nunc haberent,
**terrae si** veni! Hectoreis potes sumite [Mavortis retusa](http://tua.org/)
granum captantur potuisse Minervae, frugum.

> Clivo sub inprovisoque nostrum minus fama est, discordia patrem petebat precatur
absumitur, poena per sit. Foramina *tamen cupidine* memor supplex tollentes
dictum unam orbem, Anubis caecae. Viderat formosior tegebat satis, Aethiopasque
sit submisso coniuge tristis ubi!

## Praeceps Corinthus totidem quem crus vultum cape

```rs
#[derive(Debug)]
pub struct Site {
/// The base path of the gutenberg site
pub base_path: PathBuf,
/// The parsed config for the site
pub config: Config,
pub pages: HashMap<PathBuf, Page>,
pub sections: HashMap<PathBuf, Section>,
pub tera: Tera,
live_reload: bool,
output_path: PathBuf,
static_path: PathBuf,
pub tags: Option<Taxonomy>,
pub categories: Option<Taxonomy>,
/// A map of all .md files (section and pages) and their permalink
/// We need that if there are relative links in the content that need to be resolved
pub permalinks: HashMap<String, String>,
}
```

## More stuff
And a shortcode:

{{ youtube(id="my_youtube_id") }}

### Another subsection
Gotta make the toc do a little bit of work

# A big title

- hello
- world
- !

```py
if __name__ == "__main__":
gen_site("basic-blog", [""], 250, paginate=True)
```
"#.to_string();
page.render_markdown(&permalinks, &tera, &config, InsertAnchor::None).unwrap();
pages.push(page);
}

pages
}

// Most of the time spent in those benches are due to the .clone()...
// but i don't know how to remove them so there are some baseline bench with
// just the cloning and with a bit of math we can figure it out

#[bench]
fn bench_baseline_cloning(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| pages.clone());
}

#[bench]
fn bench_sorting_none(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| sort_pages(pages.clone(), SortBy::None));
}

#[bench]
fn bench_sorting_order(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
b.iter(|| sort_pages(pages.clone(), SortBy::Order));
}

#[bench]
fn bench_populate_previous_and_next_pages(b: &mut test::Bencher) {
let pages = create_pages(250, SortBy::Order);
let (sorted_pages, _) = sort_pages(pages, SortBy::Order);
b.iter(|| populate_previous_and_next_pages(sorted_pages.clone()));
}

+ 59
- 69
components/content/src/sorting.rs View File

@@ -3,75 +3,59 @@ use rayon::prelude::*;
use page::Page;
use front_matter::SortBy;

/// Sort pages using the method for the given section
/// Sort pages by the given criteria
///
/// Any pages that doesn't have a date when the sorting method is date or order
/// when the sorting method is order will be ignored.
/// Any pages that doesn't have a the required field when the sorting method is other than none
/// will be ignored.
pub fn sort_pages(pages: Vec<Page>, sort_by: SortBy) -> (Vec<Page>, Vec<Page>) {
match sort_by {
SortBy::Date => {
let mut can_be_sorted = vec![];
let mut cannot_be_sorted = vec![];
for page in pages {
if page.meta.date.is_some() {
can_be_sorted.push(page);
} else {
cannot_be_sorted.push(page);
}
}
can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap()));

(can_be_sorted, cannot_be_sorted)
},
SortBy::Order | SortBy::Weight => {
let mut can_be_sorted = vec![];
let mut cannot_be_sorted = vec![];
for page in pages {
if page.meta.order.is_some() {
can_be_sorted.push(page);
} else {
cannot_be_sorted.push(page);
}
}
if sort_by == SortBy::Order {
can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order()));
} else {
can_be_sorted.par_sort_unstable_by(|a, b| a.meta.order().cmp(&b.meta.order()));
if sort_by == SortBy::None {
return (pages, vec![]);
}

let (mut can_be_sorted, cannot_be_sorted): (Vec<_>, Vec<_>) = pages
.into_par_iter()
.partition(|ref page| {
match sort_by {
SortBy::Date => page.meta.date.is_some(),
SortBy::Order => page.meta.order.is_some(),
SortBy::Weight => page.meta.weight.is_some(),
_ => unreachable!()
}
});

(can_be_sorted, cannot_be_sorted)
},
SortBy::None => (pages, vec![])
}
match sort_by {
SortBy::Date => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.date().unwrap().cmp(&a.meta.date().unwrap())),
SortBy::Order => can_be_sorted.par_sort_unstable_by(|a, b| b.meta.order().cmp(&a.meta.order())),
SortBy::Weight => can_be_sorted.par_sort_unstable_by(|a, b| a.meta.weight().cmp(&b.meta.weight())),
_ => unreachable!()
};

(can_be_sorted, cannot_be_sorted)
}

/// Horribly inefficient way to set previous and next on each pages
/// So many clones
pub fn populate_previous_and_next_pages(input: &[Page]) -> Vec<Page> {
let pages = input.to_vec();
let mut res = Vec::new();
pub fn populate_previous_and_next_pages(input: Vec<Page>) -> Vec<Page> {
let mut res = Vec::with_capacity(input.len());

// the input is already sorted
// We might put prev/next randomly if a page is missing date/order, probably fine
for (i, page) in input.iter().enumerate() {
let mut new_page = page.clone();
// The input is already sorted
for (i, _) in input.iter().enumerate() {
let mut new_page = input[i].clone();

if i > 0 {
let next = &pages[i - 1];
let mut next_page = next.clone();
let mut previous_page = input[i - 1].clone();
// Remove prev/next otherwise we serialise the whole thing...
next_page.previous = None;
next_page.next = None;
new_page.next = Some(Box::new(next_page));
previous_page.previous = None;
previous_page.next = None;
new_page.previous = Some(Box::new(previous_page));
}

if i < input.len() - 1 {
let previous = &pages[i + 1];
let mut next_page = input[i + 1].clone();
// Remove prev/next otherwise we serialise the whole thing...
let mut previous_page = previous.clone();
previous_page.previous = None;
previous_page.next = None;
new_page.previous = Some(Box::new(previous_page));
next_page.previous = None;
next_page.next = None;
new_page.next = Some(Box::new(next_page));
}
res.push(new_page);
}
@@ -97,6 +81,12 @@ mod tests {
Page::new("content/hello.md", front_matter)
}

fn create_page_with_weight(weight: usize) -> Page {
let mut front_matter = PageFrontMatter::default();
front_matter.weight = Some(weight);
Page::new("content/hello.md", front_matter)
}

#[test]
fn can_sort_by_dates() {
let input = vec![
@@ -128,15 +118,15 @@ mod tests {
#[test]
fn can_sort_by_weight() {
let input = vec![
create_page_with_order(2),
create_page_with_order(3),
create_page_with_order(1),
create_page_with_weight(2),
create_page_with_weight(3),
create_page_with_weight(1),
];
let (pages, _) = sort_pages(input, SortBy::Weight);
// Should be sorted by date
assert_eq!(pages[0].clone().meta.order.unwrap(), 1);
assert_eq!(pages[1].clone().meta.order.unwrap(), 2);
assert_eq!(pages[2].clone().meta.order.unwrap(), 3);
assert_eq!(pages[0].clone().meta.weight.unwrap(), 1);
assert_eq!(pages[1].clone().meta.weight.unwrap(), 2);
assert_eq!(pages[2].clone().meta.weight.unwrap(), 3);
}

#[test]
@@ -168,23 +158,23 @@ mod tests {
#[test]
fn can_populate_previous_and_next_pages() {
let input = vec![
create_page_with_order(3),
create_page_with_order(2),
create_page_with_order(1),
create_page_with_order(2),
create_page_with_order(3),
];
let pages = populate_previous_and_next_pages(input.as_slice());
let pages = populate_previous_and_next_pages(input);

assert!(pages[0].clone().next.is_none());
assert!(pages[0].clone().previous.is_some());
assert_eq!(pages[0].clone().previous.unwrap().meta.order.unwrap(), 2);
assert!(pages[0].clone().previous.is_none());
assert!(pages[0].clone().next.is_some());
assert_eq!(pages[0].clone().next.unwrap().meta.order.unwrap(), 2);

assert!(pages[1].clone().next.is_some());
assert!(pages[1].clone().previous.is_some());
assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3);
assert_eq!(pages[1].clone().previous.unwrap().meta.order.unwrap(), 1);
assert_eq!(pages[1].clone().next.unwrap().meta.order.unwrap(), 3);

assert!(pages[2].clone().next.is_some());
assert!(pages[2].clone().previous.is_none());
assert_eq!(pages[2].clone().next.unwrap().meta.order.unwrap(), 2);
assert!(pages[2].clone().next.is_none());
assert!(pages[2].clone().previous.is_some());
assert_eq!(pages[2].clone().previous.unwrap().meta.order.unwrap(), 2);
}
}

+ 7
- 0
components/front_matter/src/page.rs View File

@@ -28,6 +28,8 @@ pub struct PageFrontMatter {
pub category: Option<String>,
/// Integer to use to order content. Lowest is at the bottom, highest first
pub order: Option<usize>,
/// Integer to use to order content. Highest is at the bottom, lowest first
pub weight: Option<usize>,
/// All aliases for that page. Gutenberg will create HTML templates that will
#[serde(skip_serializing)]
pub aliases: Option<Vec<String>>,
@@ -84,6 +86,10 @@ impl PageFrontMatter {
self.order.unwrap()
}

pub fn weight(&self) -> usize {
self.weight.unwrap()
}

pub fn has_tags(&self) -> bool {
match self.tags {
Some(ref t) => !t.is_empty(),
@@ -103,6 +109,7 @@ impl Default for PageFrontMatter {
tags: None,
category: None,
order: None,
weight: None,
aliases: None,
template: None,
extra: None,


+ 5
- 3
components/site/src/lib.rs View File

@@ -17,6 +17,7 @@ extern crate tempdir;

use std::collections::HashMap;
use std::fs::{remove_dir_all, copy, create_dir_all};
use std::mem;
use std::path::{Path, PathBuf};

use glob::glob;
@@ -289,8 +290,9 @@ impl Site {
continue;
}
}
let (sorted_pages, cannot_be_sorted_pages) = sort_pages(section.pages.clone(), section.meta.sort_by());
section.pages = populate_previous_and_next_pages(&sorted_pages);
let pages = mem::replace(&mut section.pages, vec![]);
let (sorted_pages, cannot_be_sorted_pages) = sort_pages(pages, section.meta.sort_by());
section.pages = populate_previous_and_next_pages(sorted_pages);
section.ignored_pages = cannot_be_sorted_pages;
}
}
@@ -305,7 +307,7 @@ impl Site {

// TODO: can we pass a reference?
let (tags, categories) = Taxonomy::find_tags_and_categories(
self.pages.values().cloned().collect::<Vec<_>>()
self.pages.values().cloned().collect::<Vec<_>>().as_slice()
);
if generate_tags_pages {
self.tags = Some(tags);


+ 2
- 1
components/taxonomies/src/lib.rs View File

@@ -37,6 +37,7 @@ impl TaxonomyItem {
pub fn new(name: &str, pages: Vec<Page>) -> TaxonomyItem {
// We shouldn't have any pages without dates there
let (sorted_pages, _) = sort_pages(pages, SortBy::Date);

TaxonomyItem {
name: name.to_string(),
slug: slugify(name),
@@ -55,7 +56,7 @@ pub struct Taxonomy {

impl Taxonomy {
// TODO: take a Vec<&'a Page> if it makes a difference in terms of perf for actual sites
pub fn find_tags_and_categories(all_pages: Vec<Page>) -> (Taxonomy, Taxonomy) {
pub fn find_tags_and_categories(all_pages: &[Page]) -> (Taxonomy, Taxonomy) {
let mut tags = HashMap::new();
let mut categories = HashMap::new();



Loading…
Cancel
Save