commit 5580144fd5bae0f8971a03402ce5e1e656a6155c Author: Paul Masurel Date: Fri Feb 19 17:17:23 2016 +0900 CLI diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9a48a09 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "tantivy-cli" +version = "0.1.0" +authors = ["Paul Masurel "] + + +[dependencies] +tantivy = { path = "/home/pmasurel/github/tantivy" } +time = "0.1.34" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..725ade3 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,79 @@ +extern crate tantivy; +extern crate time; + +use std::fs::File; +use std::io::BufReader; +use std::io::Read; +use tantivy::core::postings::VecPostings; +use tantivy::core::postings::Postings; +use tantivy::core::collector::TestCollector; +use tantivy::core::serial::*; +use tantivy::core::schema::*; +use tantivy::core::codec::SimpleCodec; +use tantivy::core::global::*; +use tantivy::core::searcher::Searcher; +use tantivy::core::directory::{Directory, generate_segment_name, SegmentId}; +use std::ops::DerefMut; +use tantivy::core::reader::SegmentReader; +use std::io::{ BufWriter, Write}; +use std::io; +use std::convert::From; +use std::path::PathBuf; +use tantivy::core::query; +use tantivy::core::query::parse_query; +use tantivy::core::analyzer::SimpleTokenizer; +use std::borrow::Borrow; +use std::io::BufRead; +use std::fs; +use std::io::Cursor; +use time::PreciseTime; + +fn count_docs(searcher: &Searcher, terms: &Vec) -> usize { + // let terms = vec!(, Term::from_field_text(&body_field, "france")); + let mut collector = TestCollector::new(); + searcher.search(&terms, &mut collector); + let mut num_docs = 0; + for doc_id in collector.docs().iter() { + num_docs += 1; + } + num_docs +} + +fn main() { + let str_fieldtype = FieldOptions::new(); + let text_fieldtype = FieldOptions::new().set_tokenized_indexed(); + let mut schema = Schema::new(); + let id_field = schema.add_field("id", &str_fieldtype); + let url_field = schema.add_field("url", &str_fieldtype); + let title_field = schema.add_field("title", &text_fieldtype); + let body_field = schema.add_field("body", &text_fieldtype); + let mut directory = Directory::open(&PathBuf::from("/media/ssd/wikiindex")).unwrap(); + directory.set_schema(&schema); + let searcher = Searcher::for_directory(directory); + let tokenizer = SimpleTokenizer::new(); + + let mut stdin = io::stdin(); + 'mainloop: loop { + let mut input = String::new(); + print!("> "); + stdin.read_line(&mut input); + if input == "exit\n" { + break 'mainloop; + } + let mut terms: Vec = Vec::new(); + let mut token_it = tokenizer.tokenize(&input); + let mut term_buffer = String::new(); + while token_it.read_one(&mut term_buffer) { + terms.push(Term::from_field_text(&body_field, &term_buffer)); + } + // let terms = keywords.iter().map(|s| Term::from_field_text(&body_field, &s)); + println!("Input: {:?}", input); + println!("Keywords {:?}", terms); + let start = PreciseTime::now(); + let num_docs = count_docs(&searcher, &terms); + let stop = PreciseTime::now(); + println!("Elasped time {:?}", start.to(stop)); + println!("Num_docs {:?}", num_docs); + } + +}