Browse Source

minor tweaks to tantivy indexing

'corruption' problem was version compatibility issue (v0.10 vs v0.12), this only makes changes around the edges
index-subcmd
Jonathan Strong 1 year ago
parent
commit
0322eb2764
1 changed files with 9 additions and 8 deletions
  1. +9
    -8
      components/search/src/lib.rs

+ 9
- 8
components/search/src/lib.rs View File

@@ -138,7 +138,7 @@ pub fn build_tantivy_index(
let mut schema = SchemaBuilder::new();

let title = schema.add_text_field("title", text_options.clone());
//let body = schema.add_text_field("body", text_options.clone());
let body = schema.add_text_field("body", text_options.clone());
let permalink = schema.add_text_field("permalink", STORED);

let schema = schema.build();
@@ -167,6 +167,7 @@ pub fn build_tantivy_index(

//let mut sections_it = library.sections_values().iter().filter(|s| s.lang == lang && s.meta.in_search_index);

let mut seen: HashSet<String> = Default::default();
let mut n_indexed = 0;
//let group_size = 100_000;

@@ -186,13 +187,14 @@ pub fn build_tantivy_index(
// );
//}

for _ in 0..16 {
for key in &section.pages {
let page = library.get_page_by_key(*key);

if !page.meta.in_search_index {
continue;
}
if !page.meta.in_search_index { continue; }

if seen.contains(&page.permalink) { continue }

seen.insert(page.permalink.clone());

//let mut doc = Document::default();
//doc.add(FieldValue::new(title, Value::from(page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""))));
@@ -204,7 +206,7 @@ pub fn build_tantivy_index(

let opstamp = wtr.add_document(doc!(
title => page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""),
//body => cleaned_body.as_str(),
body => cleaned_body.as_str(),
permalink => page.permalink.as_str(),
));
println!("added {:?} {}", opstamp, page.permalink);
@@ -213,14 +215,13 @@ pub fn build_tantivy_index(

//if n_indexed % group_size == 0 { }
}
}
}

wtr.prepare_commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?;
let commit_opstamp = wtr.commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?;
println!("committed {:?}", commit_opstamp);
wtr.wait_merging_threads().map_err(|e| { Error::from(format!("tantivy IndexWriter::wait_merging_threads failed: {}", e)) })?;
drop(index);
println!("finished indexing {} pages", n_indexed);

Ok(())
}


Loading…
Cancel
Save