|
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- use clap::ArgMatches;
- use serde_json;
- use std::str::FromStr;
- use std::convert::From;
- use std::path::Path;
- use std::path::PathBuf;
- use tantivy;
- use tantivy::query::QueryParser;
- use tantivy::schema::Field;
- use tantivy::schema::FieldType;
- use tantivy::Index;
- use tantivy::collector::{Count, TopDocs};
- use tantivy::Document;
-
- pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
- let index_directory = PathBuf::from(matches.value_of("index").unwrap());
- let query = matches.value_of("query").unwrap();
- match matches.value_of("num_hits") {
- Some(num_hits_str) => {
- let num_hits: usize = FromStr::from_str(num_hits_str)
- .map_err(|e| { format!("Failed to parse --num_hits (got '{}', expected integer): {}", num_hits_str, e) })?;
- run_top_search(&index_directory, &query, num_hits)
- .map_err(|e| format!("{:?}", e))
- }
-
- None => {
- run_search(&index_directory, &query).map_err(|e| format!("{:?}", e))
- }
- }
- }
-
- fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
- let index = Index::open_in_dir(directory)?;
- let schema = index.schema();
- let default_fields: Vec<Field> = schema
- .fields()
- .filter(|&(_, ref field_entry)| match *field_entry.field_type() {
- FieldType::Str(ref text_field_options) => {
- text_field_options.get_indexing_options().is_some()
- }
- _ => false,
- })
- .map(|(field, _)| field)
- .collect();
- let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
- let query = query_parser.parse_query(query)?;
- let searcher = index.reader()?.searcher();
- let weight = query.weight(&searcher, false)?;
- let schema = index.schema();
- for segment_reader in searcher.segment_readers() {
- let mut scorer = weight.scorer(segment_reader, 1.0)?;
- let store_reader = segment_reader.get_store_reader();
- while scorer.advance() {
- let doc_id = scorer.doc();
- let doc = store_reader.get(doc_id)?;
- let named_doc = schema.to_named_doc(&doc);
- println!("{}", serde_json::to_string(&named_doc).unwrap());
- }
- }
- Ok(())
- }
-
- fn run_top_search(directory: &Path, query: &str, num_hits: usize) -> tantivy::Result<()> {
- let index = Index::open_in_dir(directory)?;
- let schema = index.schema();
- let default_fields: Vec<Field> = schema
- .fields()
- .filter(|(_, field_entry)| {
- match field_entry.field_type() {
- FieldType::Str(ref text_field_options) => {
- text_field_options.get_indexing_options().is_some()
- }
- _ => false,
- }
- })
- .map(|(field, _)| field)
- .collect();
- let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
- let query = query_parser.parse_query(query)?;
- let searcher = index.reader()?.searcher();
- let (top_docs, num_hits) = searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?;
- let mut out = String::with_capacity(1024);
- top_docs
- .iter()
- .take(num_hits)
- .for_each(|(_score, doc_address)| {
- let doc: Document = searcher.doc(*doc_address).unwrap();
- let named_doc = schema.to_named_doc(&doc);
- let json_doc: String = serde_json::to_string(&named_doc).unwrap();
- out.push_str(&format!("{}\n", json_doc));
- });
- print!("{}", out);
- Ok(())
- }
-
-
-
-
|