* remove tantivy::TimerTree code * change Index::open() to Index::open_in_dir() * change Index::create() to Index::create_in_dir()develop
@@ -1,6 +1,6 @@ | |||||
[package] | [package] | ||||
name = "tantivy-cli" | name = "tantivy-cli" | ||||
version = "0.5.1" | |||||
version = "0.6.1" | |||||
authors = ["Paul Masurel <paul.masurel@gmail.com>"] | authors = ["Paul Masurel <paul.masurel@gmail.com>"] | ||||
description = """Command line interface for Tantivy, a search engine library.""" | description = """Command line interface for Tantivy, a search engine library.""" | ||||
@@ -30,7 +30,7 @@ byteorder = "0.5" | |||||
log = "0.3" | log = "0.3" | ||||
futures = "0.1" | futures = "0.1" | ||||
env_logger = "0.3" | env_logger = "0.3" | ||||
tantivy = "0.5.1" | |||||
tantivy = "0.6.1" | |||||
[[bin]] | [[bin]] | ||||
name = "tantivy" | name = "tantivy" | ||||
@@ -42,8 +42,3 @@ opt-level = 3 | |||||
debug = false | debug = false | ||||
debug-assertions = false | debug-assertions = false | ||||
lto = true | lto = true | ||||
[features] | |||||
default = ["tantivy/simdcompression"] | |||||
@@ -2,7 +2,6 @@ use tantivy::Index; | |||||
use tantivy::schema::{Field, Schema}; | use tantivy::schema::{Field, Schema}; | ||||
use tantivy::query::QueryParser; | use tantivy::query::QueryParser; | ||||
use std::path::Path; | use std::path::Path; | ||||
use tantivy::TimerTree; | |||||
use std::io::BufReader; | use std::io::BufReader; | ||||
use std::io::BufRead; | use std::io::BufRead; | ||||
use std::io; | use std::io; | ||||
@@ -52,7 +51,7 @@ fn run_bench(index_path: &Path, | |||||
println!("Query : {:?}", index_path); | println!("Query : {:?}", index_path); | ||||
println!("-------------------------------\n\n\n"); | println!("-------------------------------\n\n\n"); | ||||
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||||
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?; | |||||
let searcher = index.searcher(); | let searcher = index.searcher(); | ||||
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); | let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); | ||||
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; | let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; | ||||
@@ -66,13 +65,11 @@ fn run_bench(index_path: &Path, | |||||
// let num_terms = query.num_terms(); | // let num_terms = query.num_terms(); | ||||
let mut top_collector = TopCollector::with_limit(10); | let mut top_collector = TopCollector::with_limit(10); | ||||
let mut count_collector = CountCollector::default(); | let mut count_collector = CountCollector::default(); | ||||
let timing; | |||||
{ | { | ||||
let mut collector = chain().push(&mut top_collector).push(&mut count_collector); | let mut collector = chain().push(&mut top_collector).push(&mut count_collector); | ||||
timing = query.search(&searcher, &mut collector) | |||||
query.search(&searcher, &mut collector) | |||||
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; | .map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; | ||||
} | } | ||||
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time()); | |||||
} | } | ||||
} | } | ||||
@@ -85,6 +82,7 @@ fn run_bench(index_path: &Path, | |||||
let mut top_collector = TopCollector::with_limit(10); | let mut top_collector = TopCollector::with_limit(10); | ||||
query.search(&searcher, &mut top_collector) | query.search(&searcher, &mut top_collector) | ||||
.map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?; | .map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?; | ||||
/* | |||||
let mut timer = TimerTree::default(); | let mut timer = TimerTree::default(); | ||||
{ | { | ||||
let _scoped_timer_ = timer.open("total"); | let _scoped_timer_ = timer.open("total"); | ||||
@@ -93,6 +91,7 @@ fn run_bench(index_path: &Path, | |||||
} | } | ||||
} | } | ||||
println!("{}\t{}", query_txt, timer.total_time()); | println!("{}\t{}", query_txt, timer.total_time()); | ||||
*/ | |||||
} | } | ||||
} | } | ||||
@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf, | |||||
num_threads: usize, | num_threads: usize, | ||||
no_merge: bool) -> tantivy::Result<()> { | no_merge: bool) -> tantivy::Result<()> { | ||||
let index = Index::open(&directory)?; | |||||
let index = Index::open_in_dir(&directory)?; | |||||
let schema = index.schema(); | let schema = index.schema(); | ||||
let (line_sender, line_receiver) = chan::sync(10_000); | let (line_sender, line_receiver) = chan::sync(10_000); | ||||
let (doc_sender, doc_receiver) = chan::sync(10_000); | let (doc_sender, doc_receiver) = chan::sync(10_000); | ||||
@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> { | |||||
fn run_merge(path: PathBuf) -> tantivy::Result<()> { | fn run_merge(path: PathBuf) -> tantivy::Result<()> { | ||||
let index = Index::open(&path)?; | |||||
let index = Index::open_in_dir(&path)?; | |||||
let segments = index.searchable_segment_ids()?; | let segments = index.searchable_segment_ids()?; | ||||
let segment_meta: SegmentMeta = index | let segment_meta: SegmentMeta = index | ||||
.writer(HEAP_SIZE)? | .writer(HEAP_SIZE)? | ||||
.merge(&segments) | |||||
.merge(&segments)? | |||||
.wait() | .wait() | ||||
.expect("Merge failed"); | .expect("Merge failed"); | ||||
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); | //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); | ||||
println!("Merge finished with segment meta {:?}", segment_meta); | println!("Merge finished with segment meta {:?}", segment_meta); | ||||
println!("Garbage collect irrelevant segments."); | println!("Garbage collect irrelevant segments."); | ||||
Index::open(&path)? | |||||
Index::open_in_dir(&path)? | |||||
.writer_with_num_threads(1, 40_000_000)? | .writer_with_num_threads(1, 40_000_000)? | ||||
.garbage_collect_files()?; | .garbage_collect_files()?; | ||||
Ok(()) | Ok(()) | ||||
@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> { | |||||
} | } | ||||
fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { | fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { | ||||
let index = Index::open(directory)?; | |||||
let index = Index::open_in_dir(directory)?; | |||||
let schema = index.schema(); | let schema = index.schema(); | ||||
let default_fields: Vec<Field> = schema | let default_fields: Vec<Field> = schema | ||||
.fields() | .fields() | ||||
@@ -39,7 +39,6 @@ use tantivy::schema::Field; | |||||
use tantivy::schema::FieldType; | use tantivy::schema::FieldType; | ||||
use tantivy::schema::NamedFieldDocument; | use tantivy::schema::NamedFieldDocument; | ||||
use tantivy::schema::Schema; | use tantivy::schema::Schema; | ||||
use tantivy::TimerTree; | |||||
use tantivy::tokenizer::*; | use tantivy::tokenizer::*; | ||||
use tantivy::DocAddress; | use tantivy::DocAddress; | ||||
use urlencoded::UrlEncodedQuery; | use urlencoded::UrlEncodedQuery; | ||||
@@ -58,7 +57,6 @@ struct Serp { | |||||
q: String, | q: String, | ||||
num_hits: usize, | num_hits: usize, | ||||
hits: Vec<Hit>, | hits: Vec<Hit>, | ||||
timings: TimerTree, | |||||
} | } | ||||
#[derive(Serialize)] | #[derive(Serialize)] | ||||
@@ -76,7 +74,7 @@ struct IndexServer { | |||||
impl IndexServer { | impl IndexServer { | ||||
fn load(path: &Path) -> IndexServer { | fn load(path: &Path) -> IndexServer { | ||||
let index = Index::open(path).unwrap(); | |||||
let index = Index::open_in_dir(path).unwrap(); | |||||
index.tokenizers() | index.tokenizers() | ||||
.register("commoncrawl", SimpleTokenizer | .register("commoncrawl", SimpleTokenizer | ||||
.filter(RemoveLongFilter::limit(40)) | .filter(RemoveLongFilter::limit(40)) | ||||
@@ -121,16 +119,13 @@ impl IndexServer { | |||||
let searcher = self.index.searcher(); | let searcher = self.index.searcher(); | ||||
let mut count_collector = CountCollector::default(); | let mut count_collector = CountCollector::default(); | ||||
let mut top_collector = TopCollector::with_limit(num_hits); | let mut top_collector = TopCollector::with_limit(num_hits); | ||||
let mut timer_tree = TimerTree::default(); | |||||
{ | { | ||||
let _search_timer = timer_tree.open("search"); | |||||
let mut chained_collector = collector::chain() | let mut chained_collector = collector::chain() | ||||
.push(&mut top_collector) | .push(&mut top_collector) | ||||
.push(&mut count_collector); | .push(&mut count_collector); | ||||
query.search(&searcher, &mut chained_collector)?; | query.search(&searcher, &mut chained_collector)?; | ||||
} | } | ||||
let hits: Vec<Hit> = { | let hits: Vec<Hit> = { | ||||
let _fetching_timer = timer_tree.open("fetching docs"); | |||||
top_collector.docs() | top_collector.docs() | ||||
.iter() | .iter() | ||||
.map(|doc_address| { | .map(|doc_address| { | ||||
@@ -143,7 +138,6 @@ impl IndexServer { | |||||
q, | q, | ||||
num_hits: count_collector.count(), | num_hits: count_collector.count(), | ||||
hits, | hits, | ||||
timings: timer_tree, | |||||
}) | }) | ||||
} | } | ||||
} | } | ||||