From 0322eb276440b4c702da5d5f550d25f45ec4d36b Mon Sep 17 00:00:00 2001 From: Jonathan Strong Date: Wed, 11 Mar 2020 21:55:28 -0400 Subject: [PATCH] minor tweaks to tantivy indexing 'corruption' problem was version compatibility issue (v0.10 vs v0.12), this only makes changes around the edges --- components/search/src/lib.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/components/search/src/lib.rs b/components/search/src/lib.rs index 8857c82..8f7fec6 100644 --- a/components/search/src/lib.rs +++ b/components/search/src/lib.rs @@ -138,7 +138,7 @@ pub fn build_tantivy_index( let mut schema = SchemaBuilder::new(); let title = schema.add_text_field("title", text_options.clone()); - //let body = schema.add_text_field("body", text_options.clone()); + let body = schema.add_text_field("body", text_options.clone()); let permalink = schema.add_text_field("permalink", STORED); let schema = schema.build(); @@ -167,6 +167,7 @@ pub fn build_tantivy_index( //let mut sections_it = library.sections_values().iter().filter(|s| s.lang == lang && s.meta.in_search_index); + let mut seen: HashSet = Default::default(); let mut n_indexed = 0; //let group_size = 100_000; @@ -186,13 +187,14 @@ pub fn build_tantivy_index( // ); //} - for _ in 0..16 { for key in §ion.pages { let page = library.get_page_by_key(*key); - if !page.meta.in_search_index { - continue; - } + if !page.meta.in_search_index { continue; } + + if seen.contains(&page.permalink) { continue } + + seen.insert(page.permalink.clone()); //let mut doc = Document::default(); //doc.add(FieldValue::new(title, Value::from(page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or("")))); @@ -204,7 +206,7 @@ pub fn build_tantivy_index( let opstamp = wtr.add_document(doc!( title => page.meta.title.as_ref().map(|x| x.as_str()).unwrap_or(""), - //body => cleaned_body.as_str(), + body => cleaned_body.as_str(), permalink => page.permalink.as_str(), )); println!("added {:?} {}", opstamp, page.permalink); @@ -213,14 +215,13 @@ pub fn build_tantivy_index( //if n_indexed % group_size == 0 { } } - } } wtr.prepare_commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?; let commit_opstamp = wtr.commit().map_err(|e| { Error::from(format!("tantivy IndexWriter::commit failed: {}", e)) })?; - println!("committed {:?}", commit_opstamp); wtr.wait_merging_threads().map_err(|e| { Error::from(format!("tantivy IndexWriter::wait_merging_threads failed: {}", e)) })?; drop(index); + println!("finished indexing {} pages", n_indexed); Ok(()) }