Update to tantivy 0.9.1

5 years ago · d3de58bd26
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-cli"
 version = "0.7.0"
 version = "0.9.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]

 description = """Command line interface for Tantivy, a search engine library."""
@@ -30,7 +30,7 @@ byteorder = "0.5"
 log = "0.3"
 futures = "0.1"
 env_logger = "0.3"
 tantivy = "0.7"
 tantivy = "0.9"

 [[bin]]
 name = "tantivy"
--- a/src/commands/bench.rs
+++ b/src/commands/bench.rs
@@ -1,36 +1,32 @@
 use tantivy::Index;
 use tantivy::schema::{Field, Schema};
 use tantivy::query::QueryParser;
 use std::path::Path;
 use std::io::BufReader;
 use std::io::BufRead;
 use std::io;
 use std::fs::File;
 use tantivy::collector::chain;
 use tantivy::collector::TopCollector;
 use tantivy::collector::CountCollector;
 use clap::ArgMatches;
 use std::fs::File;
 use std::io;
 use std::io::BufRead;
 use std::io::BufReader;
 use std::path::Path;
 use std::path::PathBuf;
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
 use tantivy::schema::{Field, Schema};
 use tantivy::Index;
 use timer::TimerTree;


 pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
    let index_path = PathBuf::from(matches.value_of("index").unwrap());
    let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
    let num_repeat = value_t!(matches, "num_repeat", usize).map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
    let num_repeat = value_t!(matches, "num_repeat", usize)
        .map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
    run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
 }


 fn extract_search_fields(schema: &Schema) -> Vec<Field> {
    schema.fields()
          .iter()
          .enumerate()
          .filter(|&(_, field_entry)| {
              field_entry.is_indexed()
          })
          .map(|(field_id, _)| Field(field_id as u32))
          .collect()
    schema
        .fields()
        .iter()
        .enumerate()
        .filter(|&(_, field_entry)| field_entry.is_indexed())
        .map(|(field_id, _)| Field(field_id as u32))
        .collect()
 }

 fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
@@ -43,60 +39,70 @@ fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
    Ok(queries)
 }


 fn run_bench(index_path: &Path,
             query_filepath: &Path,
             num_repeat: usize) -> Result<(), String> {
    
 fn run_bench(index_path: &Path, query_filepath: &Path, num_repeat: usize) -> Result<(), String> {
    println!("index_path : {:?}", index_path);
    println!("Query : {:?}", index_path);
    println!("-------------------------------\n\n\n");
    
    let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
    let searcher = index.searcher();

    let index =
        Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
    let searcher = index
        .reader()
        .map_err(|err| format!("{:?}", err))?
        .searcher();
    let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
    let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file:  {}", e))?;
    let query_parser = QueryParser::new(index.schema(), default_search_fields, index.tokenizers().clone());
    
    let queries = read_query_file(query_filepath)
        .map_err(|e| format!("Failed reading the query file:  {}", e))?;
    let query_parser = QueryParser::new(
        index.schema(),
        default_search_fields,
        index.tokenizers().clone(),
    );

    println!("SEARCH\n");
    println!("{}\t{}\t{}\t{}", "query", "num_terms", "num hits", "time in microsecs");
    println!(
        "{}\t{}\t{}\t{}",
        "query", "num_terms", "num hits", "time in microsecs"
    );
    for _ in 0..num_repeat {
        for query_txt in &queries {
            let query = query_parser.parse_query(&query_txt).unwrap();
            // let num_terms = query.num_terms();
            let mut top_collector = TopCollector::with_limit(10);
            let mut count_collector = CountCollector::default();
            let mut timing = TimerTree::default();
            {
            let (_top_docs, count) = {
                let _search = timing.open("search");
                let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
                query.search(&searcher, &mut collector)
                    .map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
            }
            println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
                searcher
                    .search(&query, &(TopDocs::with_limit(10), Count))
                    .map_err(|e| {
                        format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)
                    })?
            };
            println!("{}\t{}\t{}", query_txt, count, timing.total_time());
        }
    }
    
    

    println!("\n\nFETCH STORE\n");
    println!("{}\t{}", "query", "time in microsecs");
    for _ in 0..num_repeat {
        for query_txt in &queries {
            let query = query_parser.parse_query(&query_txt).unwrap();
            let mut top_collector = TopCollector::with_limit(10);
            query.search(&searcher, &mut top_collector)
                .map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?;
            let top_docs = searcher
                .search(&*query, &TopDocs::with_limit(10))
                .map_err(|e| {
                    format!(
                        "Failed while retrieving document for query {:?}.\n{:?}",
                        query, e
                    )
                })?;
            let mut timer = TimerTree::default();
            {
                let _scoped_timer_ = timer.open("total");
                for doc_address in top_collector.docs() {
                for (_score, doc_address) in top_docs {
                    searcher.doc(doc_address).unwrap();
                }
            }
            println!("{}\t{}", query_txt, timer.total_time());
        }
    }
    

    Ok(())
 }

--- a/src/commands/index.rs
+++ b/src/commands/index.rs
@@ -1,42 +1,53 @@
 use chan;
 use clap::ArgMatches;
 use std::cmp;
 use std::convert::From;
 use std::fs::File;
 use std::io;
 use std::cmp;
 use std::io::BufRead;
 use std::io::BufReader;
 use std::io::Read;
 use std::path::PathBuf;
 use std::thread;
 use tantivy;
 use tantivy::merge_policy::NoMergePolicy;
 use tantivy::Document;
 use tantivy::Index;
 use tantivy::IndexWriter;
 use tantivy::Document;
 use time::PreciseTime;
 use clap::ArgMatches;
 use chan;
 use tantivy::merge_policy::NoMergePolicy;
 use std::thread;

 pub fn run_index_cli(argmatch: &ArgMatches) -> Result<(), String> {
    let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
    let document_source = argmatch.value_of("file")
    let document_source = argmatch
        .value_of("file")
        .map(|path| DocumentSource::FromFile(PathBuf::from(path)))
        .unwrap_or(DocumentSource::FromPipe);
    let no_merge = argmatch.is_present("nomerge");
    let mut num_threads = value_t!(argmatch, "num_threads", usize).map_err(|_| format!("Failed to read num_threads argument as an integer."))?;
    let mut num_threads = value_t!(argmatch, "num_threads", usize)
        .map_err(|_| format!("Failed to read num_threads argument as an integer."))?;
    if num_threads == 0 {
        num_threads = 1;
    }
    let buffer_size = value_t!(argmatch, "memory_size", usize).map_err(|_| format!("Failed to read the buffer size argument as an integer."))?;
    let buffer_size = value_t!(argmatch, "memory_size", usize)
        .map_err(|_| format!("Failed to read the buffer size argument as an integer."))?;
    let buffer_size_per_thread = buffer_size / num_threads;
    run_index(index_directory, document_source, buffer_size_per_thread, num_threads, no_merge).map_err(|e| format!("Indexing failed : {:?}", e))
    run_index(
        index_directory,
        document_source,
        buffer_size_per_thread,
        num_threads,
        no_merge,
    )
    .map_err(|e| format!("Indexing failed : {:?}", e))
 }

 fn run_index(directory: PathBuf,
             document_source: DocumentSource,
             buffer_size_per_thread: usize,
             num_threads: usize,
             no_merge: bool) -> tantivy::Result<()> {
    
 fn run_index(
    directory: PathBuf,
    document_source: DocumentSource,
    buffer_size_per_thread: usize,
    num_threads: usize,
    no_merge: bool,
 ) -> tantivy::Result<()> {
    let index = Index::open_in_dir(&directory)?;
    let schema = index.schema();
    let (line_sender, line_receiver) = chan::sync(10_000);
@@ -76,7 +87,7 @@ fn run_index(directory: PathBuf,
    } else {
        index.writer(buffer_size_per_thread)
    }?;
    

    if no_merge {
        index_writer.set_merge_policy(Box::new(NoMergePolicy));
    }
@@ -88,8 +99,6 @@ fn run_index(directory: PathBuf,
        info!("Indexing the documents took {} s", duration.num_seconds());
    }



    match index_result {
        Ok(docstamp) => {
            println!("Commit succeed, docstamp at {}", docstamp);
@@ -98,7 +107,10 @@ fn run_index(directory: PathBuf,
            println!("Terminated successfully!");
            {
                let duration = start_overall.to(PreciseTime::now());
                info!("Indexing the documents took {} s overall (indexing + merge)", duration.num_seconds());
                info!(
                    "Indexing the documents took {} s overall (indexing + merge)",
                    duration.num_seconds()
                );
            }
            Ok(())
        }
@@ -111,7 +123,10 @@ fn run_index(directory: PathBuf,
    }
 }

 fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<Document>) -> tantivy::Result<u64> {
 fn index_documents(
    index_writer: &mut IndexWriter,
    doc_receiver: chan::Receiver<Document>,
 ) -> tantivy::Result<u64> {
    let group_count = 100_000;
    let mut num_docs = 0;
    let mut cur = PreciseTime::now();
@@ -121,7 +136,11 @@ fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<
            println!("{} Docs", num_docs);
            let new = PreciseTime::now();
            let elapsed = cur.to(new);
            println!("{:?} docs / hour", group_count * 3600 * 1_000_000 as u64 / (elapsed.num_microseconds().unwrap() as u64));
            println!(
                "{:?} docs / hour",
                group_count * 3600 * 1_000_000 as u64
                    / (elapsed.num_microseconds().unwrap() as u64)
            );
            cur = new;
        }
        num_docs += 1;
@@ -129,18 +148,15 @@ fn index_documents(index_writer: &mut IndexWriter, doc_receiver: chan::Receiver<
    index_writer.commit()
 }


 enum DocumentSource {
    FromPipe,
    FromFile(PathBuf),
 }

 impl DocumentSource {
    fn read(&self,) -> io::Result<BufReader<Box<Read>>> {
    fn read(&self) -> io::Result<BufReader<Box<Read>>> {
        Ok(match self {
            &DocumentSource::FromPipe => {
                BufReader::new(Box::new(io::stdin()))
            }
            &DocumentSource::FromPipe => BufReader::new(Box::new(io::stdin())),
            &DocumentSource::FromFile(ref filepath) => {
                let read_file = File::open(&filepath)?;
                BufReader::new(Box::new(read_file))
--- a/src/commands/merge.rs
+++ b/src/commands/merge.rs
@@ -1,13 +1,12 @@
 extern crate tantivy;

 use tantivy::{Index, SegmentMeta};
 use std::path::PathBuf;
 use clap::ArgMatches;
 use futures::Future;
 use std::path::PathBuf;
 use tantivy::{Index, SegmentMeta};

 const HEAP_SIZE: usize = 300_000_000;


 fn error_msg(err: tantivy::Error) -> String {
    format!("Merge failed : {:?}", err)
 }
@@ -15,12 +14,10 @@ fn error_msg(err: tantivy::Error) -> String {
 pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {
    let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
    run_merge(index_directory).map_err(error_msg)
    
    // we rollback to force a gc.

    // we rollback to force a gc.
 }


 fn run_merge(path: PathBuf) -> tantivy::Result<()> {
    let index = Index::open_in_dir(&path)?;
    let segments = index.searchable_segment_ids()?;
@@ -29,7 +26,7 @@ fn run_merge(path: PathBuf) -> tantivy::Result<()> {
        .merge(&segments)?
        .wait()
        .expect("Merge failed");
        //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
    //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
    println!("Merge finished with segment meta {:?}", segment_meta);
    println!("Garbage collect irrelevant segments.");
    Index::open_in_dir(&path)?
--- a/src/commands/mod.rs
+++ b/src/commands/mod.rs
@@ -1,13 +1,13 @@
 mod index;
 mod serve;
 mod new;
 mod bench;
 mod index;
 mod merge;
 mod new;
 mod search;
 mod serve;

 pub use self::new::run_new_cli;
 pub use self::index::run_index_cli;
 pub use self::serve::run_serve_cli;
 pub use self::search::run_search_cli;
 pub use self::bench::run_bench_cli;
 pub use self::index::run_index_cli;
 pub use self::merge::run_merge_cli;
 pub use self::new::run_new_cli;
 pub use self::search::run_search_cli;
 pub use self::serve::run_serve_cli;
--- a/src/commands/new.rs
+++ b/src/commands/new.rs
@@ -1,30 +1,35 @@
 use ansi_term::Colour::{Blue, Green, Red};
 use ansi_term::Style;
 use clap::ArgMatches;
 use serde_json;
 use std::convert::From;
 use std::io;
 use std::io::Write;
 use std::path::PathBuf;
 use tantivy;
 use tantivy::schema::Cardinality;
 use tantivy::schema::*;
 use tantivy::Index;
 use std::io;
 use ansi_term::Style;
 use ansi_term::Colour::{Red, Blue, Green};
 use std::io::Write;
 use serde_json;


 pub fn run_new_cli(matches: &ArgMatches) -> Result<(), String> {
    let index_directory = PathBuf::from(matches.value_of("index").unwrap());
    run_new(index_directory).map_err(|e| format!("{:?}" , e))
    run_new(index_directory).map_err(|e| format!("{:?}", e))
 }


 fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: P) -> String {
    loop {
        print!("{prompt_text:<width$} ? ", prompt_text=Style::new().bold().fg(Blue).paint(prompt_text), width=40);
        print!(
            "{prompt_text:<width$} ? ",
            prompt_text = Style::new().bold().fg(Blue).paint(prompt_text),
            width = 40
        );
        io::stdout().flush().unwrap();
        let mut buffer = String::new();
        io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
        let answer = buffer.trim_right().to_string();
        io::stdin()
            .read_line(&mut buffer)
            .ok()
            .expect("Failed to read line");
        let answer = buffer.trim_end().to_string();
        match predicate(&answer) {
            Ok(()) => {
                return answer;
@@ -36,30 +41,28 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
    }
 }


 fn field_name_validate(field_name: &str) -> Result<(), String> {
    if is_valid_field_name(field_name) {
        Ok(())
    }
    else {
        Err(String::from("Field name must match the pattern [_a-zA-Z0-9]+"))
    } else {
        Err(String::from(
            "Field name must match the pattern [_a-zA-Z0-9]+",
        ))
    }
 }
    
    

 fn prompt_options(msg: &str, codes: Vec<char>) -> char {
    let options_string: Vec<String> = codes.iter().map(|c| format!("{}", c)).collect();
    let options = options_string.join("/");
    let predicate = |entry: &str| {
        if entry.len() != 1 {
            return Err(format!("Invalid input. Options are ({})", options))
            return Err(format!("Invalid input. Options are ({})", options));
        }
        let c = entry.chars().next().unwrap().to_ascii_uppercase();
        if codes.contains(&c) {
            return Ok(())
        }
        else {
            return Err(format!("Invalid input. Options are ({})", options))
            return Ok(());
        } else {
            return Err(format!("Invalid input. Options are ({})", options));
        }
    };
    let message = format!("{} ({})", msg, options);
@@ -68,30 +71,28 @@ fn prompt_options(msg: &str, codes: Vec<char>) -> char {
 }

 fn prompt_yn(msg: &str) -> bool {
    prompt_options(msg, vec!('Y', 'N')) == 'Y' 
    prompt_options(msg, vec!['Y', 'N']) == 'Y'
 }


 fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
    let mut text_options = TextOptions::default();
    if prompt_yn("Should the field be stored") {
        text_options = text_options.set_stored();
    }



    if prompt_yn("Should the field be indexed") {
        let mut text_indexing_options = TextFieldIndexing
            ::default()
        let mut text_indexing_options = TextFieldIndexing::default()
            .set_index_option(IndexRecordOption::Basic)
            .set_tokenizer("en_stem");

        if prompt_yn("Should the term be tokenized?") {
            if prompt_yn("Should the term frequencies (per doc) be in the index") {
                if prompt_yn("Should the term positions (per doc) be in the index") {
                    text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqsAndPositions);
                    text_indexing_options = text_indexing_options
                        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
                } else {
                    text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
                    text_indexing_options =
                        text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
                }
            }
        } else {
@@ -101,11 +102,9 @@ fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
        text_options = text_options.set_indexing_options(text_indexing_options);
    }


    schema_builder.add_text_field(field_name, text_options);
 }


 fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
    let mut u64_options = IntOptions::default();
    if prompt_yn("Should the field be stored") {
@@ -123,20 +122,28 @@ fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
 fn ask_add_field(schema_builder: &mut SchemaBuilder) {
    println!("\n\n");
    let field_name = prompt_input("New field name ", field_name_validate);
    let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!('T', 'I'));
    if text_or_integer =='T' {
    let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!['T', 'I']);
    if text_or_integer == 'T' {
        ask_add_field_text(&field_name, schema_builder);
    }
    else {
        ask_add_field_u64(&field_name, schema_builder);        
    } else {
        ask_add_field_u64(&field_name, schema_builder);
    }
 }

 fn run_new(directory: PathBuf) -> tantivy::Result<()> {
    println!("\n{} ", Style::new().bold().fg(Green).paint("Creating new index"));
    println!("{} ", Style::new().bold().fg(Green).paint("Let's define it's schema!"));
    println!(
        "\n{} ",
        Style::new().bold().fg(Green).paint("Creating new index")
    );
    println!(
        "{} ",
        Style::new()
            .bold()
            .fg(Green)
            .paint("Let's define it's schema!")
    );
    let mut schema_builder = SchemaBuilder::default();
    loop  {
    loop {
        ask_add_field(&mut schema_builder);
        if !prompt_yn("Add another field") {
            break;
@@ -148,4 +155,3 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
    Index::create_in_dir(&directory, schema)?;
    Ok(())
 }

--- a/src/commands/search.rs
+++ b/src/commands/search.rs
@@ -1,49 +1,46 @@
 use clap::ArgMatches;
 use serde_json;
 use std::convert::From;
 use std::path::Path;
 use std::path::PathBuf;
 use tantivy;
 use tantivy::Index;
 use tantivy::query::QueryParser;
 use tantivy::schema::Field;
 use serde_json;
 use tantivy::schema::FieldType;
 use tantivy::Index;

 pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
    let index_directory = PathBuf::from(matches.value_of("index").unwrap());
    let query = matches.value_of("query").unwrap();
    run_search(&index_directory, &query)
        .map_err(|e| format!("{:?}", e))
    run_search(&index_directory, &query).map_err(|e| format!("{:?}", e))
 }

 fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {     
 fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
    let index = Index::open_in_dir(directory)?;
    let schema = index.schema();
    let default_fields: Vec<Field> = schema
        .fields()
        .iter()
        .enumerate()
        .filter(
            |&(_, ref field_entry)|
                match *field_entry.field_type() {
                    FieldType::Str(ref text_field_options) => {
                        text_field_options.get_indexing_options().is_some()
                    },
                    _ => false
                }
        )
        .filter(|&(_, ref field_entry)| match *field_entry.field_type() {
            FieldType::Str(ref text_field_options) => {
                text_field_options.get_indexing_options().is_some()
            }
            _ => false,
        })
        .map(|(i, _)| Field(i as u32))
        .collect();
    let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
    let query = query_parser.parse_query(query)?;
    let searcher = index.searcher();
    let searcher = index.reader()?.searcher();
    let weight = query.weight(&searcher, false)?;
    let schema = index.schema();
    for segment_reader in searcher.segment_readers() {
        let mut scorer = weight.scorer(segment_reader)?;
        let store_reader = segment_reader.get_store_reader();
        while scorer.advance() {
            let doc_id = scorer.doc();
            let doc = segment_reader.doc(doc_id)?;
            let doc = store_reader.get(doc_id)?;
            let named_doc = schema.to_named_doc(&doc);
            println!("{}", serde_json::to_string(&named_doc).unwrap());
        }
--- a/src/commands/serve.rs
+++ b/src/commands/serve.rs
@@ -1,8 +1,8 @@
 /// This tantivy command starts a http server (by default on port 3000)
 /// 
 ///
 /// Currently the only entrypoint is /api/
 /// and it takes the following query string argument
 /// 
 ///
 /// - `q=` :    your query
 ///  - `nhits`:  the number of hits that should be returned. (default to 10)   
 ///
@@ -12,16 +12,14 @@
 ///
 ///     http://localhost:3000/api/?q=fulmicoton&&nhits=20
 ///


 use clap::ArgMatches;
 use iron::mime::Mime;
 use iron::prelude::*;
 use iron::status;
 use serde_json;
 use iron::typemap::Key;
 use mount::Mount;
 use persistent::Read;
 use serde_json;
 use std::convert::From;
 use std::error::Error;
 use std::fmt::{self, Debug};
@@ -29,11 +27,7 @@ use std::path::Path;
 use std::path::PathBuf;
 use std::str::FromStr;
 use tantivy;
 use tantivy::collector;
 use tantivy::collector::CountCollector;
 use tantivy::collector::TopCollector;
 use tantivy::Document;
 use tantivy::Index;
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::QueryParser;
 use tantivy::schema::Field;
 use tantivy::schema::FieldType;
@@ -41,6 +35,9 @@ use tantivy::schema::NamedFieldDocument;
 use tantivy::schema::Schema;
 use tantivy::tokenizer::*;
 use tantivy::DocAddress;
 use tantivy::Document;
 use tantivy::Index;
 use tantivy::IndexReader;
 use timer::TimerTree;
 use urlencoded::UrlEncodedQuery;

@@ -52,7 +49,6 @@ pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
    run_serve(index_directory, &host).map_err(|e| format!("{:?}", e))
 }


 #[derive(Serialize)]
 struct Serp {
    q: String,
@@ -68,42 +64,40 @@ struct Hit {
 }

 struct IndexServer {
    index: Index,
    reader: IndexReader,
    query_parser: QueryParser,
    schema: Schema,
 }

 impl IndexServer {
    
    fn load(path: &Path) -> IndexServer {
        let index = Index::open_in_dir(path).unwrap();
        index.tokenizers()
            .register("commoncrawl", SimpleTokenizer
            .filter(RemoveLongFilter::limit(40))
            .filter(LowerCaser)
            .filter(AlphaNumOnlyFilter)
            .filter(Stemmer::new())
        index.tokenizers().register(
            "commoncrawl",
            SimpleTokenizer
                .filter(RemoveLongFilter::limit(40))
                .filter(LowerCaser)
                .filter(AlphaNumOnlyFilter)
                .filter(Stemmer::new(Language::English)),
        );
        let schema = index.schema();
        let default_fields: Vec<Field> = schema
            .fields()
            .iter()
            .enumerate()
            .filter(
                |&(_, ref field_entry)| {
                    match *field_entry.field_type() {
                        FieldType::Str(ref text_field_options) => {
                            text_field_options.get_indexing_options().is_some()
                        },
                        _ => false
                    }
            .filter(|&(_, ref field_entry)| match *field_entry.field_type() {
                FieldType::Str(ref text_field_options) => {
                    text_field_options.get_indexing_options().is_some()
                }
            )
                _ => false,
            })
            .map(|(i, _)| Field(i as u32))
            .collect();
        let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
        let query_parser =
            QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
        let reader = index.reader().unwrap();
        IndexServer {
            index,
            reader,
            query_parser,
            schema,
        }
@@ -115,25 +109,23 @@ impl IndexServer {
            id: doc_address.doc(),
        }
    }
    

    fn search(&self, q: String, num_hits: usize) -> tantivy::Result<Serp> {
        let query = self.query_parser.parse_query(&q).expect("Parsing the query failed");
        let searcher = self.index.searcher();
        let mut count_collector = CountCollector::default();
        let mut top_collector = TopCollector::with_limit(num_hits);
        let query = self
            .query_parser
            .parse_query(&q)
            .expect("Parsing the query failed");
        let searcher = self.reader.searcher();
        let mut timer_tree = TimerTree::default();
        {
        let (top_docs, num_hits) = {
            let _search_timer = timer_tree.open("search");
            let mut chained_collector = collector::chain()
                .push(&mut top_collector)
                .push(&mut count_collector);
            query.search(&searcher, &mut chained_collector)?;
        }
            searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?
        };
        let hits: Vec<Hit> = {
            let _fetching_timer = timer_tree.open("fetching docs");
            top_collector.docs()
            top_docs
                .iter()
                .map(|doc_address| {
                .map(|(_score, doc_address)| {
                    let doc: Document = searcher.doc(*doc_address).unwrap();
                    self.create_hit(&doc, doc_address)
                })
@@ -141,7 +133,7 @@ impl IndexServer {
        };
        Ok(Serp {
            q,
            num_hits: count_collector.count(),
            num_hits,
            hits,
            timings: timer_tree,
        })
@@ -162,42 +154,53 @@ impl fmt::Display for StringError {
 }

 impl Error for StringError {
    fn description(&self) -> &str { &*self.0 }
    fn description(&self) -> &str {
        &*self.0
    }
 }

 fn search(req: &mut Request) -> IronResult<Response> {
    let index_server = req.get::<Read<IndexServer>>().unwrap();
    req.get_ref::<UrlEncodedQuery>()
        .map_err(|_| IronError::new(StringError(String::from("Failed to decode error")), status::BadRequest))
        .map_err(|_| {
            IronError::new(
                StringError(String::from("Failed to decode error")),
                status::BadRequest,
            )
        })
        .and_then(|ref qs_map| {
            let num_hits: usize = qs_map
                .get("nhits")
                .and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
                .unwrap_or(10);
            let query = qs_map
                .get("q")
                .ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest))?[0].clone();
            let query = qs_map.get("q").ok_or_else(|| {
                IronError::new(
                    StringError(String::from("Parameter q is missing from the query")),
                    status::BadRequest,
                )
            })?[0]
                .clone();
            let serp = index_server.search(query, num_hits).unwrap();
            let resp_json = serde_json::to_string_pretty(&serp).unwrap();
            let content_type = "application/json".parse::<Mime>().unwrap();
            Ok(Response::with((content_type, status::Ok, format!("{}", resp_json))))
            Ok(Response::with((
                content_type,
                status::Ok,
                format!("{}", resp_json),
            )))
        })
        
 }



 fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> {
    let mut mount = Mount::new();
    let server = IndexServer::load(&directory);
    

    mount.mount("/api", search);
    

    let mut middleware = Chain::new(mount);
    middleware.link(Read::<IndexServer>::both(server));
    

    println!("listening on http://{}", host);
    Iron::new(middleware).http(host).unwrap();
    Ok(())
 }

--- a/src/main.rs
+++ b/src/main.rs
@@ -3,41 +3,39 @@ extern crate clap;
 extern crate serde_json;
 #[macro_use]
 extern crate log;
 extern crate ansi_term;
 extern crate bincode;
 extern crate byteorder;
 extern crate chan;
 extern crate env_logger;
 extern crate tantivy;
 extern crate time;
 extern crate persistent;
 extern crate futures;
 extern crate urlencoded;
 extern crate iron;
 extern crate chan;
 extern crate staticfile;
 extern crate ansi_term;
 extern crate mount;
 extern crate bincode;
 extern crate byteorder;
 extern crate persistent;
 extern crate staticfile;
 extern crate tantivy;
 extern crate time;
 extern crate urlencoded;

 #[macro_use]
 extern crate serde_derive;

 use std::io::Write;

 use clap::{AppSettings, Arg, App, SubCommand};
 use clap::{App, AppSettings, Arg, SubCommand};
 mod commands;
 pub mod timer;
 use self::commands::*;


 fn main() {
    
    env_logger::init().unwrap();
    

    let index_arg = Arg::with_name("index")
                    .short("i")
                    .long("index")
                    .value_name("directory")
                    .help("Tantivy index directory filepath")
                    .required(true);
        .short("i")
        .long("index")
        .value_name("directory")
        .help("Tantivy index directory filepath")
        .required(true);

    let cli_options = App::new("Tantivy")
        .setting(AppSettings::SubcommandRequiredElseHelp)
@@ -135,7 +133,7 @@ fn main() {
        "search" => run_search_cli,
        "merge" => run_merge_cli,
        "bench" => run_bench_cli,
        _ => panic!("Subcommand {} is unknown", subcommand)
        _ => panic!("Subcommand {} is unknown", subcommand),
    };

    if let Err(ref e) = run_cli(options) {
--- a/src/timer.rs
+++ b/src/timer.rs
@@ -26,7 +26,8 @@ impl<'a> Drop for OpenTimer<'a> {
    fn drop(&mut self) {
        self.timer_tree.timings.push(Timing {
            name: self.name,
            duration: self.start
            duration: self
                .start
                .to(PreciseTime::now())
                .num_microseconds()
                .unwrap(),