Browse Source

update to tantivy 0.6.1

* remove tantivy::TimerTree code
* change Index::open() to Index::open_in_dir()
* change Index::create() to Index::create_in_dir()
develop
Mackenzie Hauck 5 years ago
parent
commit
a26270d11d
7 changed files with 358 additions and 323 deletions
  1. +346
    -299
      Cargo.lock
  2. +2
    -7
      Cargo.toml
  3. +4
    -5
      src/commands/bench.rs
  4. +1
    -1
      src/commands/index.rs
  5. +3
    -3
      src/commands/merge.rs
  6. +1
    -1
      src/commands/search.rs
  7. +1
    -7
      src/commands/serve.rs

+ 346
- 299
Cargo.lock
File diff suppressed because it is too large
View File


+ 2
- 7
Cargo.toml View File

@@ -1,6 +1,6 @@
[package] [package]
name = "tantivy-cli" name = "tantivy-cli"
version = "0.5.1"
version = "0.6.1"
authors = ["Paul Masurel <paul.masurel@gmail.com>"] authors = ["Paul Masurel <paul.masurel@gmail.com>"]


description = """Command line interface for Tantivy, a search engine library.""" description = """Command line interface for Tantivy, a search engine library."""
@@ -30,7 +30,7 @@ byteorder = "0.5"
log = "0.3" log = "0.3"
futures = "0.1" futures = "0.1"
env_logger = "0.3" env_logger = "0.3"
tantivy = "0.5.1"
tantivy = "0.6.1"


[[bin]] [[bin]]
name = "tantivy" name = "tantivy"
@@ -42,8 +42,3 @@ opt-level = 3
debug = false debug = false
debug-assertions = false debug-assertions = false
lto = true lto = true


[features]
default = ["tantivy/simdcompression"]


+ 4
- 5
src/commands/bench.rs View File

@@ -2,7 +2,6 @@ use tantivy::Index;
use tantivy::schema::{Field, Schema}; use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser; use tantivy::query::QueryParser;
use std::path::Path; use std::path::Path;
use tantivy::TimerTree;
use std::io::BufReader; use std::io::BufReader;
use std::io::BufRead; use std::io::BufRead;
use std::io; use std::io;
@@ -52,7 +51,7 @@ fn run_bench(index_path: &Path,
println!("Query : {:?}", index_path); println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n"); println!("-------------------------------\n\n\n");
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher(); let searcher = index.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema()); let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?; let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
@@ -66,13 +65,11 @@ fn run_bench(index_path: &Path,
// let num_terms = query.num_terms(); // let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10); let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::default(); let mut count_collector = CountCollector::default();
let timing;
{ {
let mut collector = chain().push(&mut top_collector).push(&mut count_collector); let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
timing = query.search(&searcher, &mut collector)
query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?; .map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
} }
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
} }
} }
@@ -85,6 +82,7 @@ fn run_bench(index_path: &Path,
let mut top_collector = TopCollector::with_limit(10); let mut top_collector = TopCollector::with_limit(10);
query.search(&searcher, &mut top_collector) query.search(&searcher, &mut top_collector)
.map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?; .map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?;
/*
let mut timer = TimerTree::default(); let mut timer = TimerTree::default();
{ {
let _scoped_timer_ = timer.open("total"); let _scoped_timer_ = timer.open("total");
@@ -93,6 +91,7 @@ fn run_bench(index_path: &Path,
} }
} }
println!("{}\t{}", query_txt, timer.total_time()); println!("{}\t{}", query_txt, timer.total_time());
*/
} }
} }


+ 1
- 1
src/commands/index.rs View File

@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
num_threads: usize, num_threads: usize,
no_merge: bool) -> tantivy::Result<()> { no_merge: bool) -> tantivy::Result<()> {
let index = Index::open(&directory)?;
let index = Index::open_in_dir(&directory)?;
let schema = index.schema(); let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000); let (line_sender, line_receiver) = chan::sync(10_000);
let (doc_sender, doc_receiver) = chan::sync(10_000); let (doc_sender, doc_receiver) = chan::sync(10_000);


+ 3
- 3
src/commands/merge.rs View File

@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {




fn run_merge(path: PathBuf) -> tantivy::Result<()> { fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = Index::open(&path)?;
let index = Index::open_in_dir(&path)?;
let segments = index.searchable_segment_ids()?; let segments = index.searchable_segment_ids()?;
let segment_meta: SegmentMeta = index let segment_meta: SegmentMeta = index
.writer(HEAP_SIZE)? .writer(HEAP_SIZE)?
.merge(&segments)
.merge(&segments)?
.wait() .wait()
.expect("Merge failed"); .expect("Merge failed");
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled"))); //.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
println!("Merge finished with segment meta {:?}", segment_meta); println!("Merge finished with segment meta {:?}", segment_meta);
println!("Garbage collect irrelevant segments."); println!("Garbage collect irrelevant segments.");
Index::open(&path)?
Index::open_in_dir(&path)?
.writer_with_num_threads(1, 40_000_000)? .writer_with_num_threads(1, 40_000_000)?
.garbage_collect_files()?; .garbage_collect_files()?;
Ok(()) Ok(())


+ 1
- 1
src/commands/search.rs View File

@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
} }


fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> { fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
let index = Index::open(directory)?;
let index = Index::open_in_dir(directory)?;
let schema = index.schema(); let schema = index.schema();
let default_fields: Vec<Field> = schema let default_fields: Vec<Field> = schema
.fields() .fields()


+ 1
- 7
src/commands/serve.rs View File

@@ -39,7 +39,6 @@ use tantivy::schema::Field;
use tantivy::schema::FieldType; use tantivy::schema::FieldType;
use tantivy::schema::NamedFieldDocument; use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema; use tantivy::schema::Schema;
use tantivy::TimerTree;
use tantivy::tokenizer::*; use tantivy::tokenizer::*;
use tantivy::DocAddress; use tantivy::DocAddress;
use urlencoded::UrlEncodedQuery; use urlencoded::UrlEncodedQuery;
@@ -58,7 +57,6 @@ struct Serp {
q: String, q: String,
num_hits: usize, num_hits: usize,
hits: Vec<Hit>, hits: Vec<Hit>,
timings: TimerTree,
} }


#[derive(Serialize)] #[derive(Serialize)]
@@ -76,7 +74,7 @@ struct IndexServer {
impl IndexServer { impl IndexServer {
fn load(path: &Path) -> IndexServer { fn load(path: &Path) -> IndexServer {
let index = Index::open(path).unwrap();
let index = Index::open_in_dir(path).unwrap();
index.tokenizers() index.tokenizers()
.register("commoncrawl", SimpleTokenizer .register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40)) .filter(RemoveLongFilter::limit(40))
@@ -121,16 +119,13 @@ impl IndexServer {
let searcher = self.index.searcher(); let searcher = self.index.searcher();
let mut count_collector = CountCollector::default(); let mut count_collector = CountCollector::default();
let mut top_collector = TopCollector::with_limit(num_hits); let mut top_collector = TopCollector::with_limit(num_hits);
let mut timer_tree = TimerTree::default();
{ {
let _search_timer = timer_tree.open("search");
let mut chained_collector = collector::chain() let mut chained_collector = collector::chain()
.push(&mut top_collector) .push(&mut top_collector)
.push(&mut count_collector); .push(&mut count_collector);
query.search(&searcher, &mut chained_collector)?; query.search(&searcher, &mut chained_collector)?;
} }
let hits: Vec<Hit> = { let hits: Vec<Hit> = {
let _fetching_timer = timer_tree.open("fetching docs");
top_collector.docs() top_collector.docs()
.iter() .iter()
.map(|doc_address| { .map(|doc_address| {
@@ -143,7 +138,6 @@ impl IndexServer {
q, q,
num_hits: count_collector.count(), num_hits: count_collector.count(),
hits, hits,
timings: timer_tree,
}) })
} }
} }


Loading…
Cancel
Save