use clap::ArgMatches; use serde_json; use std::str::FromStr; use std::convert::From; use std::path::Path; use std::path::PathBuf; use tantivy; use tantivy::query::QueryParser; use tantivy::schema::Field; use tantivy::schema::FieldType; use tantivy::Index; use tantivy::collector::{Count, TopDocs}; use tantivy::Document; pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> { let index_directory = PathBuf::from(matches.value_of("index").unwrap()); let query = matches.value_of("query").unwrap(); match matches.value_of("num_hits") { Some(num_hits_str) => { let num_hits: usize = FromStr::from_str(num_hits_str) .map_err(|e| { format!("Failed to parse --num_hits (got '{}', expected integer): {}", num_hits_str, e) })?; run_top_search(&index_directory, &query, num_hits) .map_err(|e| format!("{:?}", e)) } None => { run_search(&index_directory, &query).map_err(|e| format!("{:?}", e)) } } } fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { let index = Index::open_in_dir(directory)?; let schema = index.schema(); let default_fields: Vec = schema .fields() .filter(|&(_, ref field_entry)| match *field_entry.field_type() { FieldType::Str(ref text_field_options) => { text_field_options.get_indexing_options().is_some() } _ => false, }) .map(|(field, _)| field) .collect(); let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone()); let query = query_parser.parse_query(query)?; let searcher = index.reader()?.searcher(); let weight = query.weight(&searcher, false)?; let schema = index.schema(); for segment_reader in searcher.segment_readers() { let mut scorer = weight.scorer(segment_reader, 1.0)?; let store_reader = segment_reader.get_store_reader(); while scorer.advance() { let doc_id = scorer.doc(); let doc = store_reader.get(doc_id)?; let named_doc = schema.to_named_doc(&doc); println!("{}", serde_json::to_string(&named_doc).unwrap()); } } Ok(()) } fn run_top_search(directory: &Path, query: &str, num_hits: usize) -> tantivy::Result<()> { let index = Index::open_in_dir(directory)?; let schema = index.schema(); let default_fields: Vec = schema .fields() .filter(|(_, field_entry)| { match field_entry.field_type() { FieldType::Str(ref text_field_options) => { text_field_options.get_indexing_options().is_some() } _ => false, } }) .map(|(field, _)| field) .collect(); let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone()); let query = query_parser.parse_query(query)?; let searcher = index.reader()?.searcher(); let (top_docs, num_hits) = searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?; let mut out = String::with_capacity(1024); top_docs .iter() .take(num_hits) .for_each(|(_score, doc_address)| { let doc: Document = searcher.doc(*doc_address).unwrap(); let named_doc = schema.to_named_doc(&doc); let json_doc: String = serde_json::to_string(&named_doc).unwrap(); out.push_str(&format!("{}\n", json_doc)); }); print!("{}", out); Ok(()) }