Browse Source

update to tantivy 0.6.1

* remove tantivy::TimerTree code
* change Index::open() to Index::open_in_dir()
* change Index::create() to Index::create_in_dir()
develop
Mackenzie Hauck 5 years ago
parent
commit
a26270d11d
7 changed files with 358 additions and 323 deletions
  1. +346
    -299
      Cargo.lock
  2. +2
    -7
      Cargo.toml
  3. +4
    -5
      src/commands/bench.rs
  4. +1
    -1
      src/commands/index.rs
  5. +3
    -3
      src/commands/merge.rs
  6. +1
    -1
      src/commands/search.rs
  7. +1
    -7
      src/commands/serve.rs

+ 346
- 299
Cargo.lock
File diff suppressed because it is too large
View File


+ 2
- 7
Cargo.toml View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-cli"
version = "0.5.1"
version = "0.6.1"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]

description = """Command line interface for Tantivy, a search engine library."""
@@ -30,7 +30,7 @@ byteorder = "0.5"
log = "0.3"
futures = "0.1"
env_logger = "0.3"
tantivy = "0.5.1"
tantivy = "0.6.1"

[[bin]]
name = "tantivy"
@@ -42,8 +42,3 @@ opt-level = 3
debug = false
debug-assertions = false
lto = true


[features]
default = ["tantivy/simdcompression"]


+ 4
- 5
src/commands/bench.rs View File

@@ -2,7 +2,6 @@ use tantivy::Index;
use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser;
use std::path::Path;
use tantivy::TimerTree;
use std::io::BufReader;
use std::io::BufRead;
use std::io;
@@ -52,7 +51,7 @@ fn run_bench(index_path: &Path,
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
@@ -66,13 +65,11 @@ fn run_bench(index_path: &Path,
// let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::default();
let timing;
{
let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
timing = query.search(&searcher, &mut collector)
query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
}
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
}
}
@@ -85,6 +82,7 @@ fn run_bench(index_path: &Path,
let mut top_collector = TopCollector::with_limit(10);
query.search(&searcher, &mut top_collector)
.map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?;
/*
let mut timer = TimerTree::default();
{
let _scoped_timer_ = timer.open("total");
@@ -93,6 +91,7 @@ fn run_bench(index_path: &Path,
}
}
println!("{}\t{}", query_txt, timer.total_time());
*/
}
}


+ 1
- 1
src/commands/index.rs View File

@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
num_threads: usize,
no_merge: bool) -> tantivy::Result<()> {
let index = Index::open(&directory)?;
let index = Index::open_in_dir(&directory)?;
let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000);
let (doc_sender, doc_receiver) = chan::sync(10_000);


+ 3
- 3
src/commands/merge.rs View File

@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {


fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = Index::open(&path)?;
let index = Index::open_in_dir(&path)?;
let segments = index.searchable_segment_ids()?;
let segment_meta: SegmentMeta = index
.writer(HEAP_SIZE)?
.merge(&segments)
.merge(&segments)?
.wait()
.expect("Merge failed");
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
println!("Merge finished with segment meta {:?}", segment_meta);
println!("Garbage collect irrelevant segments.");
Index::open(&path)?
Index::open_in_dir(&path)?
.writer_with_num_threads(1, 40_000_000)?
.garbage_collect_files()?;
Ok(())


+ 1
- 1
src/commands/search.rs View File

@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
}

fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
let index = Index::open(directory)?;
let index = Index::open_in_dir(directory)?;
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()


+ 1
- 7
src/commands/serve.rs View File

@@ -39,7 +39,6 @@ use tantivy::schema::Field;
use tantivy::schema::FieldType;
use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema;
use tantivy::TimerTree;
use tantivy::tokenizer::*;
use tantivy::DocAddress;
use urlencoded::UrlEncodedQuery;
@@ -58,7 +57,6 @@ struct Serp {
q: String,
num_hits: usize,
hits: Vec<Hit>,
timings: TimerTree,
}

#[derive(Serialize)]
@@ -76,7 +74,7 @@ struct IndexServer {
impl IndexServer {
fn load(path: &Path) -> IndexServer {
let index = Index::open(path).unwrap();
let index = Index::open_in_dir(path).unwrap();
index.tokenizers()
.register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
@@ -121,16 +119,13 @@ impl IndexServer {
let searcher = self.index.searcher();
let mut count_collector = CountCollector::default();
let mut top_collector = TopCollector::with_limit(num_hits);
let mut timer_tree = TimerTree::default();
{
let _search_timer = timer_tree.open("search");
let mut chained_collector = collector::chain()
.push(&mut top_collector)
.push(&mut count_collector);
query.search(&searcher, &mut chained_collector)?;
}
let hits: Vec<Hit> = {
let _fetching_timer = timer_tree.open("fetching docs");
top_collector.docs()
.iter()
.map(|doc_address| {
@@ -143,7 +138,6 @@ impl IndexServer {
q,
num_hits: count_collector.count(),
hits,
timings: timer_tree,
})
}
}


Loading…
Cancel
Save