|
@@ -0,0 +1,79 @@ |
|
|
|
|
|
extern crate tantivy; |
|
|
|
|
|
extern crate time; |
|
|
|
|
|
|
|
|
|
|
|
use std::fs::File; |
|
|
|
|
|
use std::io::BufReader; |
|
|
|
|
|
use std::io::Read; |
|
|
|
|
|
use tantivy::core::postings::VecPostings; |
|
|
|
|
|
use tantivy::core::postings::Postings; |
|
|
|
|
|
use tantivy::core::collector::TestCollector; |
|
|
|
|
|
use tantivy::core::serial::*; |
|
|
|
|
|
use tantivy::core::schema::*; |
|
|
|
|
|
use tantivy::core::codec::SimpleCodec; |
|
|
|
|
|
use tantivy::core::global::*; |
|
|
|
|
|
use tantivy::core::searcher::Searcher; |
|
|
|
|
|
use tantivy::core::directory::{Directory, generate_segment_name, SegmentId}; |
|
|
|
|
|
use std::ops::DerefMut; |
|
|
|
|
|
use tantivy::core::reader::SegmentReader; |
|
|
|
|
|
use std::io::{ BufWriter, Write}; |
|
|
|
|
|
use std::io; |
|
|
|
|
|
use std::convert::From; |
|
|
|
|
|
use std::path::PathBuf; |
|
|
|
|
|
use tantivy::core::query; |
|
|
|
|
|
use tantivy::core::query::parse_query; |
|
|
|
|
|
use tantivy::core::analyzer::SimpleTokenizer; |
|
|
|
|
|
use std::borrow::Borrow; |
|
|
|
|
|
use std::io::BufRead; |
|
|
|
|
|
use std::fs; |
|
|
|
|
|
use std::io::Cursor; |
|
|
|
|
|
use time::PreciseTime; |
|
|
|
|
|
|
|
|
|
|
|
fn count_docs(searcher: &Searcher, terms: &Vec<Term>) -> usize { |
|
|
|
|
|
// let terms = vec!(, Term::from_field_text(&body_field, "france")); |
|
|
|
|
|
let mut collector = TestCollector::new(); |
|
|
|
|
|
searcher.search(&terms, &mut collector); |
|
|
|
|
|
let mut num_docs = 0; |
|
|
|
|
|
for doc_id in collector.docs().iter() { |
|
|
|
|
|
num_docs += 1; |
|
|
|
|
|
} |
|
|
|
|
|
num_docs |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
fn main() { |
|
|
|
|
|
let str_fieldtype = FieldOptions::new(); |
|
|
|
|
|
let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); |
|
|
|
|
|
let mut schema = Schema::new(); |
|
|
|
|
|
let id_field = schema.add_field("id", &str_fieldtype); |
|
|
|
|
|
let url_field = schema.add_field("url", &str_fieldtype); |
|
|
|
|
|
let title_field = schema.add_field("title", &text_fieldtype); |
|
|
|
|
|
let body_field = schema.add_field("body", &text_fieldtype); |
|
|
|
|
|
let mut directory = Directory::open(&PathBuf::from("/media/ssd/wikiindex")).unwrap(); |
|
|
|
|
|
directory.set_schema(&schema); |
|
|
|
|
|
let searcher = Searcher::for_directory(directory); |
|
|
|
|
|
let tokenizer = SimpleTokenizer::new(); |
|
|
|
|
|
|
|
|
|
|
|
let mut stdin = io::stdin(); |
|
|
|
|
|
'mainloop: loop { |
|
|
|
|
|
let mut input = String::new(); |
|
|
|
|
|
print!("> "); |
|
|
|
|
|
stdin.read_line(&mut input); |
|
|
|
|
|
if input == "exit\n" { |
|
|
|
|
|
break 'mainloop; |
|
|
|
|
|
} |
|
|
|
|
|
let mut terms: Vec<Term> = Vec::new(); |
|
|
|
|
|
let mut token_it = tokenizer.tokenize(&input); |
|
|
|
|
|
let mut term_buffer = String::new(); |
|
|
|
|
|
while token_it.read_one(&mut term_buffer) { |
|
|
|
|
|
terms.push(Term::from_field_text(&body_field, &term_buffer)); |
|
|
|
|
|
} |
|
|
|
|
|
// let terms = keywords.iter().map(|s| Term::from_field_text(&body_field, &s)); |
|
|
|
|
|
println!("Input: {:?}", input); |
|
|
|
|
|
println!("Keywords {:?}", terms); |
|
|
|
|
|
let start = PreciseTime::now(); |
|
|
|
|
|
let num_docs = count_docs(&searcher, &terms); |
|
|
|
|
|
let stop = PreciseTime::now(); |
|
|
|
|
|
println!("Elasped time {:?}", start.to(stop)); |
|
|
|
|
|
println!("Num_docs {:?}", num_docs); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} |