@@ -1,6 +1,6 @@ | |||
[package] | |||
name = "tantivy-cli" | |||
version = "0.5.1" | |||
version = "0.6.1" | |||
authors = ["Paul Masurel <paul.masurel@gmail.com>"] | |||
description = """Command line interface for Tantivy, a search engine library.""" | |||
@@ -30,7 +30,7 @@ byteorder = "0.5" | |||
log = "0.3" | |||
futures = "0.1" | |||
env_logger = "0.3" | |||
tantivy = "0.5.1" | |||
tantivy = "0.6.1" | |||
[[bin]] | |||
name = "tantivy" | |||
@@ -42,8 +42,3 @@ opt-level = 3 | |||
debug = false | |||
debug-assertions = false | |||
lto = true | |||
[features] | |||
default = ["tantivy/simdcompression"] | |||
@@ -15,14 +15,8 @@ In this tutorial, we will create a brand new index with the articles of English | |||
There are a couple ways to install `tantivy-cli`. | |||
If you are a Rust programmer, you probably have `cargo` and `rustup` installed and you can just | |||
run `rustup run nightly cargo install tantivy-cli`. (`cargo install tantivy-cli` will work | |||
as well if nightly is your default toolchain). | |||
Alternatively, you can directly download a | |||
static binary for [Linux x86 64](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-unknown-linux-musl.tar.gz) or for [Mac OS X](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-apple-darwin.tar.gz) | |||
and save it in a directory on your system's `PATH`. | |||
If you are a Rust programmer, you probably have `cargo` installed and you can just | |||
run `cargo install tantivy-cli` | |||
@@ -217,11 +211,11 @@ the following [url](http://localhost:3000/api/?q=barack+obama&nhits=20) in your | |||
By default this query is treated as `barack OR obama`. | |||
You can also search for documents that contains both term, by adding a `+` sign before the terms in your query. | |||
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama%0A&nhits=20 | |||
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama&nhits=20 | |||
Also, `-` makes it possible to remove documents the documents containing a specific term. | |||
http://localhost:3000/api/?q=-barack%20%2Bobama%0A&nhits=20 | |||
http://localhost:3000/api/?q=-barack%20%2Bobama&nhits=20 | |||
Finally tantivy handle phrase queries. | |||
@@ -2,7 +2,6 @@ use tantivy::Index; | |||
use tantivy::schema::{Field, Schema}; | |||
use tantivy::query::QueryParser; | |||
use std::path::Path; | |||
use tantivy::TimerTree; | |||
use std::io::BufReader; | |||
use std::io::BufRead; | |||
use std::io; | |||
@@ -12,6 +11,7 @@ use tantivy::collector::TopCollector; | |||
use tantivy::collector::CountCollector; | |||
use clap::ArgMatches; | |||
use std::path::PathBuf; | |||
use timer::TimerTree; | |||
pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> { | |||
@@ -52,7 +52,7 @@ fn run_bench(index_path: &Path, | |||
println!("Query : {:?}", index_path); | |||
println!("-------------------------------\n\n\n"); | |||
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||
let searcher = index.searcher(); | |||
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); | |||
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; | |||
@@ -66,10 +66,11 @@ fn run_bench(index_path: &Path, | |||
// let num_terms = query.num_terms(); | |||
let mut top_collector = TopCollector::with_limit(10); | |||
let mut count_collector = CountCollector::default(); | |||
let timing; | |||
let mut timing = TimerTree::default(); | |||
{ | |||
let _search = timing.open("search"); | |||
let mut collector = chain().push(&mut top_collector).push(&mut count_collector); | |||
timing = query.search(&searcher, &mut collector) | |||
query.search(&searcher, &mut collector) | |||
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; | |||
} | |||
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time()); | |||
@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf, | |||
num_threads: usize, | |||
no_merge: bool) -> tantivy::Result<()> { | |||
let index = Index::open(&directory)?; | |||
let index = Index::open_in_dir(&directory)?; | |||
let schema = index.schema(); | |||
let (line_sender, line_receiver) = chan::sync(10_000); | |||
let (doc_sender, doc_receiver) = chan::sync(10_000); | |||
@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> { | |||
fn run_merge(path: PathBuf) -> tantivy::Result<()> { | |||
let index = Index::open(&path)?; | |||
let index = Index::open_in_dir(&path)?; | |||
let segments = index.searchable_segment_ids()?; | |||
let segment_meta: SegmentMeta = index | |||
.writer(HEAP_SIZE)? | |||
.merge(&segments) | |||
.merge(&segments)? | |||
.wait() | |||
.expect("Merge failed"); | |||
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); | |||
println!("Merge finished with segment meta {:?}", segment_meta); | |||
println!("Garbage collect irrelevant segments."); | |||
Index::open(&path)? | |||
Index::open_in_dir(&path)? | |||
.writer_with_num_threads(1, 40_000_000)? | |||
.garbage_collect_files()?; | |||
Ok(()) | |||
@@ -24,7 +24,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: | |||
io::stdout().flush().unwrap(); | |||
let mut buffer = String::new(); | |||
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line"); | |||
let answer = buffer.trim_right_matches("\n").to_string(); | |||
let answer = buffer.trim_right().to_string(); | |||
match predicate(&answer) { | |||
Ok(()) => { | |||
return answer; | |||
@@ -145,7 +145,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> { | |||
let schema = schema_builder.build(); | |||
let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap()); | |||
println!("\n{}\n", Style::new().fg(Green).paint(schema_json)); | |||
Index::create(&directory, schema)?; | |||
Index::create_in_dir(&directory, schema)?; | |||
Ok(()) | |||
} | |||
@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> { | |||
} | |||
fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { | |||
let index = Index::open(directory)?; | |||
let index = Index::open_in_dir(directory)?; | |||
let schema = index.schema(); | |||
let default_fields: Vec<Field> = schema | |||
.fields() | |||
@@ -39,9 +39,9 @@ use tantivy::schema::Field; | |||
use tantivy::schema::FieldType; | |||
use tantivy::schema::NamedFieldDocument; | |||
use tantivy::schema::Schema; | |||
use tantivy::TimerTree; | |||
use tantivy::tokenizer::*; | |||
use tantivy::DocAddress; | |||
use timer::TimerTree; | |||
use urlencoded::UrlEncodedQuery; | |||
pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> { | |||
@@ -76,7 +76,7 @@ struct IndexServer { | |||
impl IndexServer { | |||
fn load(path: &Path) -> IndexServer { | |||
let index = Index::open(path).unwrap(); | |||
let index = Index::open_in_dir(path).unwrap(); | |||
index.tokenizers() | |||
.register("commoncrawl", SimpleTokenizer | |||
.filter(RemoveLongFilter::limit(40)) | |||
@@ -23,6 +23,7 @@ extern crate serde_derive; | |||
use clap::{AppSettings, Arg, App, SubCommand}; | |||
mod commands; | |||
pub mod timer; | |||
use self::commands::*; | |||
@@ -0,0 +1,99 @@ | |||
use time::PreciseTime; | |||
pub struct OpenTimer<'a> { | |||
name: &'static str, | |||
timer_tree: &'a mut TimerTree, | |||
start: PreciseTime, | |||
depth: u32, | |||
} | |||
impl<'a> OpenTimer<'a> { | |||
/// Starts timing a new named subtask | |||
/// | |||
/// The timer is stopped automatically | |||
/// when the `OpenTimer` is dropped. | |||
pub fn open(&mut self, name: &'static str) -> OpenTimer { | |||
OpenTimer { | |||
name, | |||
timer_tree: self.timer_tree, | |||
start: PreciseTime::now(), | |||
depth: self.depth + 1, | |||
} | |||
} | |||
} | |||
impl<'a> Drop for OpenTimer<'a> { | |||
fn drop(&mut self) { | |||
self.timer_tree.timings.push(Timing { | |||
name: self.name, | |||
duration: self.start | |||
.to(PreciseTime::now()) | |||
.num_microseconds() | |||
.unwrap(), | |||
depth: self.depth, | |||
}); | |||
} | |||
} | |||
/// Timing recording | |||
#[derive(Debug, Serialize)] | |||
pub struct Timing { | |||
name: &'static str, | |||
duration: i64, | |||
depth: u32, | |||
} | |||
/// Timer tree | |||
#[derive(Debug, Serialize)] | |||
pub struct TimerTree { | |||
timings: Vec<Timing>, | |||
} | |||
impl TimerTree { | |||
/// Returns the total time elapsed in microseconds | |||
pub fn total_time(&self) -> i64 { | |||
self.timings.last().unwrap().duration | |||
} | |||
/// Open a new named subtask | |||
pub fn open(&mut self, name: &'static str) -> OpenTimer { | |||
OpenTimer { | |||
name, | |||
timer_tree: self, | |||
start: PreciseTime::now(), | |||
depth: 0, | |||
} | |||
} | |||
} | |||
impl Default for TimerTree { | |||
fn default() -> TimerTree { | |||
TimerTree { | |||
timings: Vec::new(), | |||
} | |||
} | |||
} | |||
#[cfg(test)] | |||
mod tests { | |||
use super::*; | |||
#[test] | |||
fn test_timer() { | |||
let mut timer_tree = TimerTree::default(); | |||
{ | |||
let mut a = timer_tree.open("a"); | |||
{ | |||
let mut ab = a.open("b"); | |||
{ | |||
let _abc = ab.open("c"); | |||
} | |||
{ | |||
let _abd = ab.open("d"); | |||
} | |||
} | |||
} | |||
assert_eq!(timer_tree.timings.len(), 4); | |||
} | |||
} |