Browse Source

Update to tantivy 0.9.1

develop
Paul Masurel 5 years ago
parent
commit
d3de58bd26
11 changed files with 964 additions and 577 deletions
  1. +712
    -349
      Cargo.lock
  2. +2
    -2
      Cargo.toml
  3. +57
    -51
      src/commands/bench.rs
  4. +43
    -27
      src/commands/index.rs
  5. +4
    -7
      src/commands/merge.rs
  6. +7
    -7
      src/commands/mod.rs
  7. +47
    -41
      src/commands/new.rs
  8. +13
    -16
      src/commands/search.rs
  9. +60
    -57
      src/commands/serve.rs
  10. +17
    -19
      src/main.rs
  11. +2
    -1
      src/timer.rs

+ 712
- 349
Cargo.lock
File diff suppressed because it is too large
View File


+ 2
- 2
Cargo.toml View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-cli"
version = "0.7.0"
version = "0.9.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]

description = """Command line interface for Tantivy, a search engine library."""
@@ -30,7 +30,7 @@ byteorder = "0.5"
log = "0.3"
futures = "0.1"
env_logger = "0.3"
tantivy = "0.7"
tantivy = "0.9"

[[bin]]
name = "tantivy"


+ 57
- 51
src/commands/bench.rs View File

@@ -1,36 +1,32 @@
use tantivy::Index;
use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser;
use std::path::Path;
use std::io::BufReader;
use std::io::BufRead;
use std::io;
use std::fs::File;
use tantivy::collector::chain;
use tantivy::collector::TopCollector;
use tantivy::collector::CountCollector;
use clap::ArgMatches;
use std::fs::File;
use std::io;
use std::io::BufRead;
use std::io::BufReader;
use std::path::Path;
use std::path::PathBuf;
use tantivy::collector::{Count, TopDocs};
use tantivy::query::QueryParser;
use tantivy::schema::{Field, Schema};
use tantivy::Index;
use timer::TimerTree;


pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
let index_path = PathBuf::from(matches.value_of("index").unwrap());
let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
let num_repeat = value_t!(matches, "num_repeat", usize).map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
let num_repeat = value_t!(matches, "num_repeat", usize)
.map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
}


fn extract_search_fields(schema: &Schema) -> Vec<Field> {
schema.fields()
.iter()
.enumerate()
.filter(|&(_, field_entry)| {
field_entry.is_indexed()
})
.map(|(field_id, _)| Field(field_id as u32))
.collect()
schema
.fields()
.iter()
.enumerate()
.filter(|&(_, field_entry)| field_entry.is_indexed())
.map(|(field_id, _)| Field(field_id as u32))
.collect()
}

fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
@@ -43,60 +39,70 @@ fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
Ok(queries)
}


fn run_bench(index_path: &Path,
query_filepath: &Path,
num_repeat: usize) -> Result<(), String> {
fn run_bench(index_path: &Path, query_filepath: &Path, num_repeat: usize) -> Result<(), String> {
println!("index_path : {:?}", index_path);
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher();

let index =
Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index
.reader()
.map_err(|err| format!("{:?}", err))?
.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
let query_parser = QueryParser::new(index.schema(), default_search_fields, index.tokenizers().clone());
let queries = read_query_file(query_filepath)
.map_err(|e| format!("Failed reading the query file: {}", e))?;
let query_parser = QueryParser::new(
index.schema(),
default_search_fields,
index.tokenizers().clone(),
);

println!("SEARCH\n");
println!("{}\t{}\t{}\t{}", "query", "num_terms", "num hits", "time in microsecs");
println!(
"{}\t{}\t{}\t{}",
"query", "num_terms", "num hits", "time in microsecs"
);
for _ in 0..num_repeat {
for query_txt in &queries {
let query = query_parser.parse_query(&query_txt).unwrap();
// let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::default();
let mut timing = TimerTree::default();
{
let (_top_docs, count) = {
let _search = timing.open("search");
let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
}
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
searcher
.search(&query, &(TopDocs::with_limit(10), Count))
.map_err(|e| {
format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)
})?
};
println!("{}\t{}\t{}", query_txt, count, timing.total_time());
}
}

println!("\n\nFETCH STORE\n");
println!("{}\t{}", "query", "time in microsecs");
for _ in 0..num_repeat {
for query_txt in &queries {
let query = query_parser.parse_query(&query_txt).unwrap();
let mut top_collector = TopCollector::with_limit(10);
query.search(&searcher, &mut top_collector)
.map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?;
let top_docs = searcher
.search(&*query, &TopDocs::with_limit(10))
.map_err(|e| {
format!(
"Failed while retrieving document for query {:?}.\n{:?}",
query, e
)
})?;
let mut timer = TimerTree::default();
{
let _scoped_timer_ = timer.open("total");
for doc_address in top_collector.docs() {
for (_score, doc_address) in top_docs {
searcher.doc(doc_address).unwrap();
}
}
println!("{}\t{}", query_txt, timer.total_time());
}
}
Ok(())
}


+ 43
- 27
src/commands/index.rs View File

@@ -1,42 +1,53 @@
use chan;
use clap::ArgMatches;
use std::cmp;
use std::convert::From;
use std::fs::File;
use std::io;
use std::cmp;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
use std::path::PathBuf;
use std::thread;
use tantivy;
use tantivy::merge_policy::NoMergePolicy;
use tantivy::Document;
use tantivy::Index;
use tantivy::IndexWriter;
use tantivy::Document;
use time::PreciseTime;
use clap::ArgMatches;
use chan;
use tantivy::merge_policy::NoMergePolicy;
use std::thread;

pub fn run_index_cli(argmatch: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
let document_source = argmatch.value_of("file")
let document_source = argmatch
.value_of("file")
.map(|path| DocumentSource::FromFile(PathBuf::from(path)))
.unwrap_or(DocumentSource::FromPipe);
let no_merge = argmatch.is_present("nomerge");
let mut num_threads = value_t!(argmatch, "num_threads", usize).map_err(|_| format!("Failed to read num_threads argument as an integer."))?;
let mut num_threads = value_t!(argmatch, "num_threads", usize)
.map_err(|_| format!("Failed to read num_threads argument as an integer."))?;
if num_threads == 0 {
num_threads = 1;
}
let buffer_size = value_t!(argmatch, "memory_size", usize).map_err(|_| format!("Failed to read the buffer size argument as an integer."))?;
let buffer_size = value_t!(argmatch, "memory_size", usize)
.map_err(|_| format!("Failed to read the buffer size argument as an integer."))?;
let buffer_size_per_thread = buffer_size / num_threads;
run_index(index_directory, document_source, buffer_size_per_thread, num_threads, no_merge).map_err(|e| format!("Indexing failed : {:?}", e))
run_index(
index_directory,
document_source,
buffer_size_per_thread,
num_threads,
no_merge,
)
.map_err(|e| format!("Indexing failed : {:?}", e))
}

fn run_index(directory: PathBuf,
document_source: DocumentSource,
buffer_size_per_thread: usize,
num_threads: usize,
no_merge: bool) -> tantivy::Result<()> {
fn run_index(
directory: PathBuf,
document_source: DocumentSource,
buffer_size_per_thread: usize,
num_threads: usize,
no_merge: bool,
) -> tantivy::Result<()> {
let index = Index::open_in_dir(&directory)?;
let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000);
@@ -76,7 +87,7 @@ fn run_index(directory: PathBuf,
} else {
index.writer(buffer_size_per_thread)
}?;
if no_merge {
index_writer.set_merge_policy(Box::new(NoMergePolicy));
}
@@ -88,8 +99,6 @@ fn run_index(directory: PathBuf,
info!("Indexing the documents took {} s", duration.num_seconds());
}



match index_result {
Ok(docstamp) => {
println!("Commit succeed, docstamp at {}", docstamp);
@@ -98,7 +107,10 @@ fn run_index(directory: PathBuf,
println!("Terminated successfully!");
{
let duration = start_overall.to(PreciseTime::now());
info!("Indexing the documents took {} s overall (indexing + merge)", duration.num_seconds());
info!(
"Indexing the documents took {} s overall (indexing + merge)",
duration.num_seconds()
);
}
Ok(())
}
@@ -111,7 +123,10 @@ fn run_index(directory: PathBuf,
}
}

fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<Document>) -> tantivy::Result<u64> {
fn index_documents(
index_writer: &mut IndexWriter,
doc_receiver: chan::Receiver<Document>,
) -> tantivy::Result<u64> {
let group_count = 100_000;
let mut num_docs = 0;
let mut cur = PreciseTime::now();
@@ -121,7 +136,11 @@ fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<
println!("{} Docs", num_docs);
let new = PreciseTime::now();
let elapsed = cur.to(new);
println!("{:?} docs / hour", group_count * 3600 * 1_000_000 as u64 / (elapsed.num_microseconds().unwrap() as u64));
println!(
"{:?} docs / hour",
group_count * 3600 * 1_000_000 as u64
/ (elapsed.num_microseconds().unwrap() as u64)
);
cur = new;
}
num_docs += 1;
@@ -129,18 +148,15 @@ fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<
index_writer.commit()
}


enum DocumentSource {
FromPipe,
FromFile(PathBuf),
}

impl DocumentSource {
fn read(&self,) -> io::Result<BufReader<Box<Read>>> {
fn read(&self) -> io::Result<BufReader<Box<Read>>> {
Ok(match self {
&DocumentSource::FromPipe => {
BufReader::new(Box::new(io::stdin()))
}
&DocumentSource::FromPipe => BufReader::new(Box::new(io::stdin())),
&DocumentSource::FromFile(ref filepath) => {
let read_file = File::open(&filepath)?;
BufReader::new(Box::new(read_file))


+ 4
- 7
src/commands/merge.rs View File

@@ -1,13 +1,12 @@
extern crate tantivy;

use tantivy::{Index, SegmentMeta};
use std::path::PathBuf;
use clap::ArgMatches;
use futures::Future;
use std::path::PathBuf;
use tantivy::{Index, SegmentMeta};

const HEAP_SIZE: usize = 300_000_000;


fn error_msg(err: tantivy::Error) -> String {
format!("Merge failed : {:?}", err)
}
@@ -15,12 +14,10 @@ fn error_msg(err: tantivy::Error) -> String {
pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
run_merge(index_directory).map_err(error_msg)
// we rollback to force a gc.

// we rollback to force a gc.
}


fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = Index::open_in_dir(&path)?;
let segments = index.searchable_segment_ids()?;
@@ -29,7 +26,7 @@ fn run_merge(path: PathBuf) -> tantivy::Result<()> {
.merge(&segments)?
.wait()
.expect("Merge failed");
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
println!("Merge finished with segment meta {:?}", segment_meta);
println!("Garbage collect irrelevant segments.");
Index::open_in_dir(&path)?


+ 7
- 7
src/commands/mod.rs View File

@@ -1,13 +1,13 @@
mod index;
mod serve;
mod new;
mod bench;
mod index;
mod merge;
mod new;
mod search;
mod serve;

pub use self::new::run_new_cli;
pub use self::index::run_index_cli;
pub use self::serve::run_serve_cli;
pub use self::search::run_search_cli;
pub use self::bench::run_bench_cli;
pub use self::index::run_index_cli;
pub use self::merge::run_merge_cli;
pub use self::new::run_new_cli;
pub use self::search::run_search_cli;
pub use self::serve::run_serve_cli;

+ 47
- 41
src/commands/new.rs View File

@@ -1,30 +1,35 @@
use ansi_term::Colour::{Blue, Green, Red};
use ansi_term::Style;
use clap::ArgMatches;
use serde_json;
use std::convert::From;
use std::io;
use std::io::Write;
use std::path::PathBuf;
use tantivy;
use tantivy::schema::Cardinality;
use tantivy::schema::*;
use tantivy::Index;
use std::io;
use ansi_term::Style;
use ansi_term::Colour::{Red, Blue, Green};
use std::io::Write;
use serde_json;


pub fn run_new_cli(matches: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(matches.value_of("index").unwrap());
run_new(index_directory).map_err(|e| format!("{:?}" , e))
run_new(index_directory).map_err(|e| format!("{:?}", e))
}


fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: P) -> String {
loop {
print!("{prompt_text:<width$} ? ", prompt_text=Style::new().bold().fg(Blue).paint(prompt_text), width=40);
print!(
"{prompt_text:<width$} ? ",
prompt_text = Style::new().bold().fg(Blue).paint(prompt_text),
width = 40
);
io::stdout().flush().unwrap();
let mut buffer = String::new();
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
let answer = buffer.trim_right().to_string();
io::stdin()
.read_line(&mut buffer)
.ok()
.expect("Failed to read line");
let answer = buffer.trim_end().to_string();
match predicate(&answer) {
Ok(()) => {
return answer;
@@ -36,30 +41,28 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
}
}


fn field_name_validate(field_name: &str) -> Result<(), String> {
if is_valid_field_name(field_name) {
Ok(())
}
else {
Err(String::from("Field name must match the pattern [_a-zA-Z0-9]+"))
} else {
Err(String::from(
"Field name must match the pattern [_a-zA-Z0-9]+",
))
}
}

fn prompt_options(msg: &str, codes: Vec<char>) -> char {
let options_string: Vec<String> = codes.iter().map(|c| format!("{}", c)).collect();
let options = options_string.join("/");
let predicate = |entry: &str| {
if entry.len() != 1 {
return Err(format!("Invalid input. Options are ({})", options))
return Err(format!("Invalid input. Options are ({})", options));
}
let c = entry.chars().next().unwrap().to_ascii_uppercase();
if codes.contains(&c) {
return Ok(())
}
else {
return Err(format!("Invalid input. Options are ({})", options))
return Ok(());
} else {
return Err(format!("Invalid input. Options are ({})", options));
}
};
let message = format!("{} ({})", msg, options);
@@ -68,30 +71,28 @@ fn prompt_options(msg: &str, codes: Vec<char>) -> char {
}

fn prompt_yn(msg: &str) -> bool {
prompt_options(msg, vec!('Y', 'N')) == 'Y'
prompt_options(msg, vec!['Y', 'N']) == 'Y'
}


fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
let mut text_options = TextOptions::default();
if prompt_yn("Should the field be stored") {
text_options = text_options.set_stored();
}



if prompt_yn("Should the field be indexed") {
let mut text_indexing_options = TextFieldIndexing
::default()
let mut text_indexing_options = TextFieldIndexing::default()
.set_index_option(IndexRecordOption::Basic)
.set_tokenizer("en_stem");

if prompt_yn("Should the term be tokenized?") {
if prompt_yn("Should the term frequencies (per doc) be in the index") {
if prompt_yn("Should the term positions (per doc) be in the index") {
text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqsAndPositions);
text_indexing_options = text_indexing_options
.set_index_option(IndexRecordOption::WithFreqsAndPositions);
} else {
text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
text_indexing_options =
text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
}
}
} else {
@@ -101,11 +102,9 @@ fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
text_options = text_options.set_indexing_options(text_indexing_options);
}


schema_builder.add_text_field(field_name, text_options);
}


fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
let mut u64_options = IntOptions::default();
if prompt_yn("Should the field be stored") {
@@ -123,20 +122,28 @@ fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
fn ask_add_field(schema_builder: &mut SchemaBuilder) {
println!("\n\n");
let field_name = prompt_input("New field name ", field_name_validate);
let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!('T', 'I'));
if text_or_integer =='T' {
let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!['T', 'I']);
if text_or_integer == 'T' {
ask_add_field_text(&field_name, schema_builder);
}
else {
ask_add_field_u64(&field_name, schema_builder);
} else {
ask_add_field_u64(&field_name, schema_builder);
}
}

fn run_new(directory: PathBuf) -> tantivy::Result<()> {
println!("\n{} ", Style::new().bold().fg(Green).paint("Creating new index"));
println!("{} ", Style::new().bold().fg(Green).paint("Let's define it's schema!"));
println!(
"\n{} ",
Style::new().bold().fg(Green).paint("Creating new index")
);
println!(
"{} ",
Style::new()
.bold()
.fg(Green)
.paint("Let's define it's schema!")
);
let mut schema_builder = SchemaBuilder::default();
loop {
loop {
ask_add_field(&mut schema_builder);
if !prompt_yn("Add another field") {
break;
@@ -148,4 +155,3 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
Index::create_in_dir(&directory, schema)?;
Ok(())
}


+ 13
- 16
src/commands/search.rs View File

@@ -1,49 +1,46 @@
use clap::ArgMatches;
use serde_json;
use std::convert::From;
use std::path::Path;
use std::path::PathBuf;
use tantivy;
use tantivy::Index;
use tantivy::query::QueryParser;
use tantivy::schema::Field;
use serde_json;
use tantivy::schema::FieldType;
use tantivy::Index;

pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(matches.value_of("index").unwrap());
let query = matches.value_of("query").unwrap();
run_search(&index_directory, &query)
.map_err(|e| format!("{:?}", e))
run_search(&index_directory, &query).map_err(|e| format!("{:?}", e))
}

fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
let index = Index::open_in_dir(directory)?;
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()
.iter()
.enumerate()
.filter(
|&(_, ref field_entry)|
match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_some()
},
_ => false
}
)
.filter(|&(_, ref field_entry)| match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_some()
}
_ => false,
})
.map(|(i, _)| Field(i as u32))
.collect();
let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
let query = query_parser.parse_query(query)?;
let searcher = index.searcher();
let searcher = index.reader()?.searcher();
let weight = query.weight(&searcher, false)?;
let schema = index.schema();
for segment_reader in searcher.segment_readers() {
let mut scorer = weight.scorer(segment_reader)?;
let store_reader = segment_reader.get_store_reader();
while scorer.advance() {
let doc_id = scorer.doc();
let doc = segment_reader.doc(doc_id)?;
let doc = store_reader.get(doc_id)?;
let named_doc = schema.to_named_doc(&doc);
println!("{}", serde_json::to_string(&named_doc).unwrap());
}


+ 60
- 57
src/commands/serve.rs View File

@@ -1,8 +1,8 @@
/// This tantivy command starts a http server (by default on port 3000)
///
///
/// Currently the only entrypoint is /api/
/// and it takes the following query string argument
///
///
/// - `q=` : your query
/// - `nhits`: the number of hits that should be returned. (default to 10)
///
@@ -12,16 +12,14 @@
///
/// http://localhost:3000/api/?q=fulmicoton&&nhits=20
///


use clap::ArgMatches;
use iron::mime::Mime;
use iron::prelude::*;
use iron::status;
use serde_json;
use iron::typemap::Key;
use mount::Mount;
use persistent::Read;
use serde_json;
use std::convert::From;
use std::error::Error;
use std::fmt::{self, Debug};
@@ -29,11 +27,7 @@ use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;
use tantivy;
use tantivy::collector;
use tantivy::collector::CountCollector;
use tantivy::collector::TopCollector;
use tantivy::Document;
use tantivy::Index;
use tantivy::collector::{Count, TopDocs};
use tantivy::query::QueryParser;
use tantivy::schema::Field;
use tantivy::schema::FieldType;
@@ -41,6 +35,9 @@ use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema;
use tantivy::tokenizer::*;
use tantivy::DocAddress;
use tantivy::Document;
use tantivy::Index;
use tantivy::IndexReader;
use timer::TimerTree;
use urlencoded::UrlEncodedQuery;

@@ -52,7 +49,6 @@ pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
run_serve(index_directory, &host).map_err(|e| format!("{:?}", e))
}


#[derive(Serialize)]
struct Serp {
q: String,
@@ -68,42 +64,40 @@ struct Hit {
}

struct IndexServer {
index: Index,
reader: IndexReader,
query_parser: QueryParser,
schema: Schema,
}

impl IndexServer {
fn load(path: &Path) -> IndexServer {
let index = Index::open_in_dir(path).unwrap();
index.tokenizers()
.register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AlphaNumOnlyFilter)
.filter(Stemmer::new())
index.tokenizers().register(
"commoncrawl",
SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AlphaNumOnlyFilter)
.filter(Stemmer::new(Language::English)),
);
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()
.iter()
.enumerate()
.filter(
|&(_, ref field_entry)| {
match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_some()
},
_ => false
}
.filter(|&(_, ref field_entry)| match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_some()
}
)
_ => false,
})
.map(|(i, _)| Field(i as u32))
.collect();
let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
let query_parser =
QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
let reader = index.reader().unwrap();
IndexServer {
index,
reader,
query_parser,
schema,
}
@@ -115,25 +109,23 @@ impl IndexServer {
id: doc_address.doc(),
}
}
fn search(&self, q: String, num_hits: usize) -> tantivy::Result<Serp> {
let query = self.query_parser.parse_query(&q).expect("Parsing the query failed");
let searcher = self.index.searcher();
let mut count_collector = CountCollector::default();
let mut top_collector = TopCollector::with_limit(num_hits);
let query = self
.query_parser
.parse_query(&q)
.expect("Parsing the query failed");
let searcher = self.reader.searcher();
let mut timer_tree = TimerTree::default();
{
let (top_docs, num_hits) = {
let _search_timer = timer_tree.open("search");
let mut chained_collector = collector::chain()
.push(&mut top_collector)
.push(&mut count_collector);
query.search(&searcher, &mut chained_collector)?;
}
searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?
};
let hits: Vec<Hit> = {
let _fetching_timer = timer_tree.open("fetching docs");
top_collector.docs()
top_docs
.iter()
.map(|doc_address| {
.map(|(_score, doc_address)| {
let doc: Document = searcher.doc(*doc_address).unwrap();
self.create_hit(&doc, doc_address)
})
@@ -141,7 +133,7 @@ impl IndexServer {
};
Ok(Serp {
q,
num_hits: count_collector.count(),
num_hits,
hits,
timings: timer_tree,
})
@@ -162,42 +154,53 @@ impl fmt::Display for StringError {
}

impl Error for StringError {
fn description(&self) -> &str { &*self.0 }
fn description(&self) -> &str {
&*self.0
}
}

fn search(req: &mut Request) -> IronResult<Response> {
let index_server = req.get::<Read<IndexServer>>().unwrap();
req.get_ref::<UrlEncodedQuery>()
.map_err(|_| IronError::new(StringError(String::from("Failed to decode error")), status::BadRequest))
.map_err(|_| {
IronError::new(
StringError(String::from("Failed to decode error")),
status::BadRequest,
)
})
.and_then(|ref qs_map| {
let num_hits: usize = qs_map
.get("nhits")
.and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
.unwrap_or(10);
let query = qs_map
.get("q")
.ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest))?[0].clone();
let query = qs_map.get("q").ok_or_else(|| {
IronError::new(
StringError(String::from("Parameter q is missing from the query")),
status::BadRequest,
)
})?[0]
.clone();
let serp = index_server.search(query, num_hits).unwrap();
let resp_json = serde_json::to_string_pretty(&serp).unwrap();
let content_type = "application/json".parse::<Mime>().unwrap();
Ok(Response::with((content_type, status::Ok, format!("{}", resp_json))))
Ok(Response::with((
content_type,
status::Ok,
format!("{}", resp_json),
)))
})
}



fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> {
let mut mount = Mount::new();
let server = IndexServer::load(&directory);
mount.mount("/api", search);
let mut middleware = Chain::new(mount);
middleware.link(Read::<IndexServer>::both(server));
println!("listening on http://{}", host);
Iron::new(middleware).http(host).unwrap();
Ok(())
}


+ 17
- 19
src/main.rs View File

@@ -3,41 +3,39 @@ extern crate clap;
extern crate serde_json;
#[macro_use]
extern crate log;
extern crate ansi_term;
extern crate bincode;
extern crate byteorder;
extern crate chan;
extern crate env_logger;
extern crate tantivy;
extern crate time;
extern crate persistent;
extern crate futures;
extern crate urlencoded;
extern crate iron;
extern crate chan;
extern crate staticfile;
extern crate ansi_term;
extern crate mount;
extern crate bincode;
extern crate byteorder;
extern crate persistent;
extern crate staticfile;
extern crate tantivy;
extern crate time;
extern crate urlencoded;

#[macro_use]
extern crate serde_derive;

use std::io::Write;

use clap::{AppSettings, Arg, App, SubCommand};
use clap::{App, AppSettings, Arg, SubCommand};
mod commands;
pub mod timer;
use self::commands::*;


fn main() {
env_logger::init().unwrap();
let index_arg = Arg::with_name("index")
.short("i")
.long("index")
.value_name("directory")
.help("Tantivy index directory filepath")
.required(true);
.short("i")
.long("index")
.value_name("directory")
.help("Tantivy index directory filepath")
.required(true);

let cli_options = App::new("Tantivy")
.setting(AppSettings::SubcommandRequiredElseHelp)
@@ -135,7 +133,7 @@ fn main() {
"search" => run_search_cli,
"merge" => run_merge_cli,
"bench" => run_bench_cli,
_ => panic!("Subcommand {} is unknown", subcommand)
_ => panic!("Subcommand {} is unknown", subcommand),
};

if let Err(ref e) = run_cli(options) {


+ 2
- 1
src/timer.rs View File

@@ -26,7 +26,8 @@ impl<'a> Drop for OpenTimer<'a> {
fn drop(&mut self) {
self.timer_tree.timings.push(Timing {
name: self.name,
duration: self.start
duration: self
.start
.to(PreciseTime::now())
.num_microseconds()
.unwrap(),


Loading…
Cancel
Save