@@ -1,6 +1,6 @@ | |||||
[package] | [package] | ||||
name = "tantivy-cli" | name = "tantivy-cli" | ||||
version = "0.5.1" | |||||
version = "0.6.1" | |||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"] | authors = ["Paul Masurel <paul.masurel@gmail.com>"] | ||||
description = """Command line interface for Tantivy, a search engine library.""" | description = """Command line interface for Tantivy, a search engine library.""" | ||||
@@ -30,7 +30,7 @@ byteorder = "0.5" | |||||
log = "0.3" | log = "0.3" | ||||
futures = "0.1" | futures = "0.1" | ||||
env_logger = "0.3" | env_logger = "0.3" | ||||
tantivy = "0.5.1" | |||||
tantivy = "0.6.1" | |||||
[[bin]] | [[bin]] | ||||
name = "tantivy" | name = "tantivy" | ||||
@@ -42,8 +42,3 @@ opt-level = 3 | |||||
debug = false | debug = false | ||||
debug-assertions = false | debug-assertions = false | ||||
lto = true | lto = true | ||||
[features] | |||||
default = ["tantivy/simdcompression"] | |||||
@@ -15,14 +15,8 @@ In this tutorial, we will create a brand new index with the articles of English | |||||
There are a couple ways to install `tantivy-cli`. | There are a couple ways to install `tantivy-cli`. | ||||
If you are a Rust programmer, you probably have `cargo` and `rustup` installed and you can just | |||||
run `rustup run nightly cargo install tantivy-cli`. (`cargo install tantivy-cli` will work | |||||
as well if nightly is your default toolchain). | |||||
Alternatively, you can directly download a | |||||
static binary for [Linux x86 64](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-unknown-linux-musl.tar.gz) or for [Mac OS X](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-apple-darwin.tar.gz) | |||||
and save it in a directory on your system's `PATH`. | |||||
If you are a Rust programmer, you probably have `cargo` installed and you can just | |||||
run `cargo install tantivy-cli` | |||||
@@ -217,11 +211,11 @@ the following [url](http://localhost:3000/api/?q=barack+obama&nhits=20) in your | |||||
By default this query is treated as `barack OR obama`. | By default this query is treated as `barack OR obama`. | ||||
You can also search for documents that contains both term, by adding a `+` sign before the terms in your query. | You can also search for documents that contains both term, by adding a `+` sign before the terms in your query. | ||||
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama%0A&nhits=20 | |||||
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama&nhits=20 | |||||
Also, `-` makes it possible to remove documents the documents containing a specific term. | Also, `-` makes it possible to remove documents the documents containing a specific term. | ||||
http://localhost:3000/api/?q=-barack%20%2Bobama%0A&nhits=20 | |||||
http://localhost:3000/api/?q=-barack%20%2Bobama&nhits=20 | |||||
Finally tantivy handle phrase queries. | Finally tantivy handle phrase queries. | ||||
@@ -2,7 +2,6 @@ use tantivy::Index; | |||||
use tantivy::schema::{Field, Schema}; | use tantivy::schema::{Field, Schema}; | ||||
use tantivy::query::QueryParser; | use tantivy::query::QueryParser; | ||||
use std::path::Path; | use std::path::Path; | ||||
use tantivy::TimerTree; | |||||
use std::io::BufReader; | use std::io::BufReader; | ||||
use std::io::BufRead; | use std::io::BufRead; | ||||
use std::io; | use std::io; | ||||
@@ -12,6 +11,7 @@ use tantivy::collector::TopCollector; | |||||
use tantivy::collector::CountCollector; | use tantivy::collector::CountCollector; | ||||
use clap::ArgMatches; | use clap::ArgMatches; | ||||
use std::path::PathBuf; | use std::path::PathBuf; | ||||
use timer::TimerTree; | |||||
pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> { | pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> { | ||||
@@ -52,7 +52,7 @@ fn run_bench(index_path: &Path, | |||||
println!("Query : {:?}", index_path); | println!("Query : {:?}", index_path); | ||||
println!("-------------------------------\n\n\n"); | println!("-------------------------------\n\n\n"); | ||||
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||||
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||||
let searcher = index.searcher(); | let searcher = index.searcher(); | ||||
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); | let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); | ||||
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; | let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; | ||||
@@ -66,10 +66,11 @@ fn run_bench(index_path: &Path, | |||||
// let num_terms = query.num_terms(); | // let num_terms = query.num_terms(); | ||||
let mut top_collector = TopCollector::with_limit(10); | let mut top_collector = TopCollector::with_limit(10); | ||||
let mut count_collector = CountCollector::default(); | let mut count_collector = CountCollector::default(); | ||||
let timing; | |||||
let mut timing = TimerTree::default(); | |||||
{ | { | ||||
let _search = timing.open("search"); | |||||
let mut collector = chain().push(&mut top_collector).push(&mut count_collector); | let mut collector = chain().push(&mut top_collector).push(&mut count_collector); | ||||
timing = query.search(&searcher, &mut collector) | |||||
query.search(&searcher, &mut collector) | |||||
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; | .map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; | ||||
} | } | ||||
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time()); | println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time()); | ||||
@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf, | |||||
num_threads: usize, | num_threads: usize, | ||||
no_merge: bool) -> tantivy::Result<()> { | no_merge: bool) -> tantivy::Result<()> { | ||||
let index = Index::open(&directory)?; | |||||
let index = Index::open_in_dir(&directory)?; | |||||
let schema = index.schema(); | let schema = index.schema(); | ||||
let (line_sender, line_receiver) = chan::sync(10_000); | let (line_sender, line_receiver) = chan::sync(10_000); | ||||
let (doc_sender, doc_receiver) = chan::sync(10_000); | let (doc_sender, doc_receiver) = chan::sync(10_000); | ||||
@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> { | |||||
fn run_merge(path: PathBuf) -> tantivy::Result<()> { | fn run_merge(path: PathBuf) -> tantivy::Result<()> { | ||||
let index = Index::open(&path)?; | |||||
let index = Index::open_in_dir(&path)?; | |||||
let segments = index.searchable_segment_ids()?; | let segments = index.searchable_segment_ids()?; | ||||
let segment_meta: SegmentMeta = index | let segment_meta: SegmentMeta = index | ||||
.writer(HEAP_SIZE)? | .writer(HEAP_SIZE)? | ||||
.merge(&segments) | |||||
.merge(&segments)? | |||||
.wait() | .wait() | ||||
.expect("Merge failed"); | .expect("Merge failed"); | ||||
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); | //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); | ||||
println!("Merge finished with segment meta {:?}", segment_meta); | println!("Merge finished with segment meta {:?}", segment_meta); | ||||
println!("Garbage collect irrelevant segments."); | println!("Garbage collect irrelevant segments."); | ||||
Index::open(&path)? | |||||
Index::open_in_dir(&path)? | |||||
.writer_with_num_threads(1, 40_000_000)? | .writer_with_num_threads(1, 40_000_000)? | ||||
.garbage_collect_files()?; | .garbage_collect_files()?; | ||||
Ok(()) | Ok(()) | ||||
@@ -24,7 +24,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: | |||||
io::stdout().flush().unwrap(); | io::stdout().flush().unwrap(); | ||||
let mut buffer = String::new(); | let mut buffer = String::new(); | ||||
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line"); | io::stdin().read_line(&mut buffer).ok().expect("Failed to read line"); | ||||
let answer = buffer.trim_right_matches("\n").to_string(); | |||||
let answer = buffer.trim_right().to_string(); | |||||
match predicate(&answer) { | match predicate(&answer) { | ||||
Ok(()) => { | Ok(()) => { | ||||
return answer; | return answer; | ||||
@@ -145,7 +145,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> { | |||||
let schema = schema_builder.build(); | let schema = schema_builder.build(); | ||||
let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap()); | let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap()); | ||||
println!("\n{}\n", Style::new().fg(Green).paint(schema_json)); | println!("\n{}\n", Style::new().fg(Green).paint(schema_json)); | ||||
Index::create(&directory, schema)?; | |||||
Index::create_in_dir(&directory, schema)?; | |||||
Ok(()) | Ok(()) | ||||
} | } | ||||
@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> { | |||||
} | } | ||||
fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { | fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { | ||||
let index = Index::open(directory)?; | |||||
let index = Index::open_in_dir(directory)?; | |||||
let schema = index.schema(); | let schema = index.schema(); | ||||
let default_fields: Vec<Field> = schema | let default_fields: Vec<Field> = schema | ||||
.fields() | .fields() | ||||
@@ -39,9 +39,9 @@ use tantivy::schema::Field; | |||||
use tantivy::schema::FieldType; | use tantivy::schema::FieldType; | ||||
use tantivy::schema::NamedFieldDocument; | use tantivy::schema::NamedFieldDocument; | ||||
use tantivy::schema::Schema; | use tantivy::schema::Schema; | ||||
use tantivy::TimerTree; | |||||
use tantivy::tokenizer::*; | use tantivy::tokenizer::*; | ||||
use tantivy::DocAddress; | use tantivy::DocAddress; | ||||
use timer::TimerTree; | |||||
use urlencoded::UrlEncodedQuery; | use urlencoded::UrlEncodedQuery; | ||||
pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> { | pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> { | ||||
@@ -76,7 +76,7 @@ struct IndexServer { | |||||
impl IndexServer { | impl IndexServer { | ||||
fn load(path: &Path) -> IndexServer { | fn load(path: &Path) -> IndexServer { | ||||
let index = Index::open(path).unwrap(); | |||||
let index = Index::open_in_dir(path).unwrap(); | |||||
index.tokenizers() | index.tokenizers() | ||||
.register("commoncrawl", SimpleTokenizer | .register("commoncrawl", SimpleTokenizer | ||||
.filter(RemoveLongFilter::limit(40)) | .filter(RemoveLongFilter::limit(40)) | ||||
@@ -23,6 +23,7 @@ extern crate serde_derive; | |||||
use clap::{AppSettings, Arg, App, SubCommand}; | use clap::{AppSettings, Arg, App, SubCommand}; | ||||
mod commands; | mod commands; | ||||
pub mod timer; | |||||
use self::commands::*; | use self::commands::*; | ||||
@@ -0,0 +1,99 @@ | |||||
use time::PreciseTime; | |||||
pub struct OpenTimer<'a> { | |||||
name: &'static str, | |||||
timer_tree: &'a mut TimerTree, | |||||
start: PreciseTime, | |||||
depth: u32, | |||||
} | |||||
impl<'a> OpenTimer<'a> { | |||||
/// Starts timing a new named subtask | |||||
/// | |||||
/// The timer is stopped automatically | |||||
/// when the `OpenTimer` is dropped. | |||||
pub fn open(&mut self, name: &'static str) -> OpenTimer { | |||||
OpenTimer { | |||||
name, | |||||
timer_tree: self.timer_tree, | |||||
start: PreciseTime::now(), | |||||
depth: self.depth + 1, | |||||
} | |||||
} | |||||
} | |||||
impl<'a> Drop for OpenTimer<'a> { | |||||
fn drop(&mut self) { | |||||
self.timer_tree.timings.push(Timing { | |||||
name: self.name, | |||||
duration: self.start | |||||
.to(PreciseTime::now()) | |||||
.num_microseconds() | |||||
.unwrap(), | |||||
depth: self.depth, | |||||
}); | |||||
} | |||||
} | |||||
/// Timing recording | |||||
#[derive(Debug, Serialize)] | |||||
pub struct Timing { | |||||
name: &'static str, | |||||
duration: i64, | |||||
depth: u32, | |||||
} | |||||
/// Timer tree | |||||
#[derive(Debug, Serialize)] | |||||
pub struct TimerTree { | |||||
timings: Vec<Timing>, | |||||
} | |||||
impl TimerTree { | |||||
/// Returns the total time elapsed in microseconds | |||||
pub fn total_time(&self) -> i64 { | |||||
self.timings.last().unwrap().duration | |||||
} | |||||
/// Open a new named subtask | |||||
pub fn open(&mut self, name: &'static str) -> OpenTimer { | |||||
OpenTimer { | |||||
name, | |||||
timer_tree: self, | |||||
start: PreciseTime::now(), | |||||
depth: 0, | |||||
} | |||||
} | |||||
} | |||||
impl Default for TimerTree { | |||||
fn default() -> TimerTree { | |||||
TimerTree { | |||||
timings: Vec::new(), | |||||
} | |||||
} | |||||
} | |||||
#[cfg(test)] | |||||
mod tests { | |||||
use super::*; | |||||
#[test] | |||||
fn test_timer() { | |||||
let mut timer_tree = TimerTree::default(); | |||||
{ | |||||
let mut a = timer_tree.open("a"); | |||||
{ | |||||
let mut ab = a.open("b"); | |||||
{ | |||||
let _abc = ab.open("c"); | |||||
} | |||||
{ | |||||
let _abd = ab.open("d"); | |||||
} | |||||
} | |||||
} | |||||
assert_eq!(timer_tree.timings.len(), 4); | |||||
} | |||||
} |