Merge branch 'master' into develop

7 years ago · a270d7f3ad
--- a/.cargo/config
+++ b/.cargo/config
@@ -0,0 +1,2 @@
 [build]
 rustflags = "-C target-cpu=native"
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-cli"
 version = "0.5.0"
 version = "0.6.1"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 description = """Command line interface for Tantivy, a search engine library."""
@@ -30,8 +30,7 @@ byteorder = "0.5"
 log = "0.3"
 futures = "0.1"
 env_logger = "0.3"
 version = "2"
 tantivy = "0.5.0" 
 tantivy = "0.6.1"
 [[bin]]
 name = "tantivy"
@@ -43,8 +42,3 @@ opt-level = 3
 debug = false
 debug-assertions = false
 lto = true
 [features]
 default = ["tantivy/simdcompression"]
--- a/README.md
+++ b/README.md
@@ -15,14 +15,8 @@ In this tutorial, we will create a brand new index with the articles of English
 There are a couple ways to install `tantivy-cli`.
 If you are a Rust programmer, you probably have `cargo` and `rustup` installed and you can just
 run `rustup run nightly cargo install tantivy-cli`. (`cargo install tantivy-cli` will work
 as well if nightly is your default toolchain).
 Alternatively, you can directly download a
 static binary for [Linux x86 64](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-unknown-linux-musl.tar.gz) or for [Mac OS X](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-apple-darwin.tar.gz)
 and save it in a directory on your system's `PATH`.
 If you are a Rust programmer, you probably have `cargo` installed and you can just
 run `cargo install tantivy-cli`
@@ -217,11 +211,11 @@ the following [url](http://localhost:3000/api/?q=barack+obama&nhits=20) in your
 By default this query is treated as `barack OR obama`.
 You can also search for documents that contains both term, by adding a `+` sign before the terms in your query.
    http://localhost:3000/api/?q=%2Bbarack%20%2Bobama%0A&nhits=20
    http://localhost:3000/api/?q=%2Bbarack%20%2Bobama&nhits=20
 Also, `-` makes it possible to remove documents the documents containing a specific term.
    http://localhost:3000/api/?q=-barack%20%2Bobama%0A&nhits=20
    http://localhost:3000/api/?q=-barack%20%2Bobama&nhits=20
 Finally tantivy handle phrase queries.
--- a/src/commands/bench.rs
+++ b/src/commands/bench.rs
@@ -2,7 +2,6 @@ use tantivy::Index;
 use tantivy::schema::{Field, Schema};
 use tantivy::query::QueryParser;
 use std::path::Path;
 use tantivy::TimerTree;
 use std::io::BufReader;
 use std::io::BufRead;
 use std::io;
@@ -12,6 +11,7 @@ use tantivy::collector::TopCollector;
 use tantivy::collector::CountCollector;
 use clap::ArgMatches;
 use std::path::PathBuf;
 use timer::TimerTree;
 pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -52,7 +52,7 @@ fn run_bench(index_path: &Path,
    println!("Query : {:?}", index_path);
    println!("-------------------------------\n\n\n");
    let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
    let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
    let searcher = index.searcher();
    let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
    let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file:  {}", e))?;
@@ -66,10 +66,11 @@ fn run_bench(index_path: &Path,
            // let num_terms = query.num_terms();
            let mut top_collector = TopCollector::with_limit(10);
            let mut count_collector = CountCollector::default();
            let timing;
            let mut timing = TimerTree::default();
            {
                let _search = timing.open("search");
                let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
                timing = query.search(&searcher, &mut collector)
                query.search(&searcher, &mut collector)
                    .map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
            }
            println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
--- a/src/commands/index.rs
+++ b/src/commands/index.rs
@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
             num_threads: usize,
             no_merge: bool) -> tantivy::Result<()> {
    let index = Index::open(&directory)?;
    let index = Index::open_in_dir(&directory)?;
    let schema = index.schema();
    let (line_sender, line_receiver) = chan::sync(10_000);
    let (doc_sender, doc_receiver) = chan::sync(10_000);
--- a/src/commands/merge.rs
+++ b/src/commands/merge.rs
@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {
 fn run_merge(path: PathBuf) -> tantivy::Result<()> {
    let index = Index::open(&path)?;
    let index = Index::open_in_dir(&path)?;
    let segments = index.searchable_segment_ids()?;
    let segment_meta: SegmentMeta = index
        .writer(HEAP_SIZE)?
        .merge(&segments)
        .merge(&segments)?
        .wait()
        .expect("Merge failed");
        //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
    println!("Merge finished with segment meta {:?}", segment_meta);
    println!("Garbage collect irrelevant segments.");
    Index::open(&path)?
    Index::open_in_dir(&path)?
        .writer_with_num_threads(1, 40_000_000)?
        .garbage_collect_files()?;
    Ok(())
--- a/src/commands/new.rs
+++ b/src/commands/new.rs
@@ -24,7 +24,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
        io::stdout().flush().unwrap();
        let mut buffer = String::new();
        io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
        let answer = buffer.trim_right_matches("\n").to_string();
        let answer = buffer.trim_right().to_string();
        match predicate(&answer) {
            Ok(()) => {
                return answer;
@@ -145,7 +145,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
    let schema = schema_builder.build();
    let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap());
    println!("\n{}\n", Style::new().fg(Green).paint(schema_json));
    Index::create(&directory, schema)?;
    Index::create_in_dir(&directory, schema)?;
    Ok(())
 }
--- a/src/commands/search.rs
+++ b/src/commands/search.rs
@@ -17,21 +17,20 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
 }
 fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {     
    let index = Index::open(directory)?;
    let index = Index::open_in_dir(directory)?;
    let schema = index.schema();
    let default_fields: Vec<Field> = schema
        .fields()
        .iter()
        .enumerate()
        .filter(
            |&(_, ref field_entry)      | {
            |&(_, ref field_entry)|
                match *field_entry.field_type() {
                    FieldType::Str(ref text_field_options) => {
                        text_field_options.get_indexing_options().is_some()
                    },
                    _ => false
                }
            }
        )
        .map(|(i, _)| Field(i as u32))
        .collect();
--- a/src/commands/serve.rs
+++ b/src/commands/serve.rs
@@ -39,9 +39,9 @@ use tantivy::schema::Field;
 use tantivy::schema::FieldType;
 use tantivy::schema::NamedFieldDocument;
 use tantivy::schema::Schema;
 use tantivy::TimerTree;
 use tantivy::tokenizer::*;
 use tantivy::DocAddress;
 use timer::TimerTree;
 use urlencoded::UrlEncodedQuery;
 pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -76,7 +76,7 @@ struct IndexServer {
 impl IndexServer {
    fn load(path: &Path) -> IndexServer {
        let index = Index::open(path).unwrap();
        let index = Index::open_in_dir(path).unwrap();
        index.tokenizers()
            .register("commoncrawl", SimpleTokenizer
            .filter(RemoveLongFilter::limit(40))
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,5 @@
 #[macro_use]
 extern crate clap;
 #[macro_use]
 extern crate version;
 extern crate serde_json;
 #[macro_use]
 extern crate log;
@@ -26,6 +24,7 @@ use std::io::Write;
 use clap::{AppSettings, Arg, App, SubCommand};
 mod commands;
 pub mod timer;
 use self::commands::*;
@@ -42,7 +41,7 @@ fn main() {
    let cli_options = App::new("Tantivy")
        .setting(AppSettings::SubcommandRequiredElseHelp)
        .version(version!())
        .version(env!("CARGO_PKG_VERSION"))
        .author("Paul Masurel <paul.masurel@gmail.com>")
        .about("Tantivy Search Engine's command line interface.")
        .subcommand(
--- a/src/timer.rs
+++ b/src/timer.rs
@@ -0,0 +1,99 @@
 use time::PreciseTime;
 pub struct OpenTimer<'a> {
    name: &'static str,
    timer_tree: &'a mut TimerTree,
    start: PreciseTime,
    depth: u32,
 }
 impl<'a> OpenTimer<'a> {
    /// Starts timing a new named subtask
    ///
    /// The timer is stopped automatically
    /// when the `OpenTimer` is dropped.
    pub fn open(&mut self, name: &'static str) -> OpenTimer {
        OpenTimer {
            name,
            timer_tree: self.timer_tree,
            start: PreciseTime::now(),
            depth: self.depth + 1,
        }
    }
 }
 impl<'a> Drop for OpenTimer<'a> {
    fn drop(&mut self) {
        self.timer_tree.timings.push(Timing {
            name: self.name,
            duration: self.start
                .to(PreciseTime::now())
                .num_microseconds()
                .unwrap(),
            depth: self.depth,
        });
    }
 }
 /// Timing recording
 #[derive(Debug, Serialize)]
 pub struct Timing {
    name: &'static str,
    duration: i64,
    depth: u32,
 }
 /// Timer tree
 #[derive(Debug, Serialize)]
 pub struct TimerTree {
    timings: Vec<Timing>,
 }
 impl TimerTree {
    /// Returns the total time elapsed in microseconds
    pub fn total_time(&self) -> i64 {
        self.timings.last().unwrap().duration
    }
    /// Open a new named subtask
    pub fn open(&mut self, name: &'static str) -> OpenTimer {
        OpenTimer {
            name,
            timer_tree: self,
            start: PreciseTime::now(),
            depth: 0,
        }
    }
 }
 impl Default for TimerTree {
    fn default() -> TimerTree {
        TimerTree {
            timings: Vec::new(),
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_timer() {
        let mut timer_tree = TimerTree::default();
        {
            let mut a = timer_tree.open("a");
            {
                let mut ab = a.open("b");
                {
                    let _abc = ab.open("c");
                }
                {
                    let _abd = ab.open("d");
                }
            }
        }
        assert_eq!(timer_tree.timings.len(), 4);
    }
 }