Browse Source

minor tweaks to tantivy indexing

'corruption' problem was version compatibility issue (v0.10 vs v0.12), this only makes changes around the edges
index-subcmd
Jonathan Strong 4 years ago
parent
commit
0322eb2764
1 changed files with 9 additions and 8 deletions
  1. +9
    -8
      components/search/src/lib.rs

+ 9
- 8
components/search/src/lib.rs View File

@@ -138,7 +138,7 @@ pub fn build_tantivy_index(
let mut schema = SchemaBuilder::new(); let mut schema = SchemaBuilder::new();


let title = schema.add_text_field("title", text_options.clone()); let title = schema.add_text_field("title", text_options.clone());
//let body = schema.add_text_field("body", text_options.clone());
let body = schema.add_text_field("body", text_options.clone());
let permalink = schema.add_text_field("permalink", STORED); let permalink = schema.add_text_field("permalink", STORED);


let schema = schema.build(); let schema = schema.build();
@@ -167,6 +167,7 @@ pub fn build_tantivy_index(


//let mut sections_it = library.sections_values().iter().filter(|s| s.lang == lang && s.meta.in_search_index); //let mut sections_it = library.sections_values().iter().filter(|s| s.lang == lang && s.meta.in_search_index);


let mut seen: HashSet<String> = Default::default();
let mut n_indexed = 0; let mut n_indexed = 0;
//let group_size = 100_000; //let group_size = 100_000;


@@ -186,13 +187,14 @@ pub fn build_tantivy_index(
// ); // );
//} //}


for _ in 0..16 {
for key in &section.pages { for key in &section.pages {
let page = library.get_page_by_key(*key); let page = library.get_page_by_key(*key);


if !page.meta.in_search_index {
continue;
}
if !page.meta.in_search_index { continue; }

if seen.contains(&page.permalink) { continue }

seen.insert(page.permalink.clone());


//let mut doc = Document::default(); //let mut doc = Document::default();
//doc.add(FieldValue::new(title, Value::from(page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or("")))); //doc.add(FieldValue::new(title, Value::from(page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""))));
@@ -204,7 +206,7 @@ pub fn build_tantivy_index(


let opstamp = wtr.add_document(doc!( let opstamp = wtr.add_document(doc!(
title => page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""), title => page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""),
//body => cleaned_body.as_str(),
body => cleaned_body.as_str(),
permalink => page.permalink.as_str(), permalink => page.permalink.as_str(),
)); ));
println!("added {:?} {}", opstamp, page.permalink); println!("added {:?} {}", opstamp, page.permalink);
@@ -213,14 +215,13 @@ pub fn build_tantivy_index(


//if n_indexed % group_size == 0 { } //if n_indexed % group_size == 0 { }
} }
}
} }


wtr.prepare_commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?; wtr.prepare_commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?;
let commit_opstamp = wtr.commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?; let commit_opstamp = wtr.commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?;
println!("committed {:?}", commit_opstamp);
wtr.wait_merging_threads().map_err(|e| { Error::from(format!("tantivy IndexWriter::wait_merging_threads failed: {}", e)) })?; wtr.wait_merging_threads().map_err(|e| { Error::from(format!("tantivy IndexWriter::wait_merging_threads failed: {}", e)) })?;
drop(index); drop(index);
println!("finished indexing {} pages", n_indexed);


Ok(()) Ok(())
} }


Loading…
Cancel
Save