Browse Source

Merge pull request #28 from nocduro/update_tantivy

update tantivy to 0.6.1
develop
Paul Masurel GitHub 5 years ago
parent
commit
a81b270442
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 466 additions and 329 deletions
  1. +346
    -299
      Cargo.lock
  2. +2
    -7
      Cargo.toml
  3. +4
    -10
      README.md
  4. +5
    -4
      src/commands/bench.rs
  5. +1
    -1
      src/commands/index.rs
  6. +3
    -3
      src/commands/merge.rs
  7. +2
    -2
      src/commands/new.rs
  8. +1
    -1
      src/commands/search.rs
  9. +2
    -2
      src/commands/serve.rs
  10. +1
    -0
      src/main.rs
  11. +99
    -0
      src/timer.rs

+ 346
- 299
Cargo.lock
File diff suppressed because it is too large
View File


+ 2
- 7
Cargo.toml View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-cli"
version = "0.5.1"
version = "0.6.1"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]

description = """Command line interface for Tantivy, a search engine library."""
@@ -30,7 +30,7 @@ byteorder = "0.5"
log = "0.3"
futures = "0.1"
env_logger = "0.3"
tantivy = "0.5.1"
tantivy = "0.6.1"

[[bin]]
name = "tantivy"
@@ -42,8 +42,3 @@ opt-level = 3
debug = false
debug-assertions = false
lto = true


[features]
default = ["tantivy/simdcompression"]


+ 4
- 10
README.md View File

@@ -15,14 +15,8 @@ In this tutorial, we will create a brand new index with the articles of English

There are a couple ways to install `tantivy-cli`.

If you are a Rust programmer, you probably have `cargo` and `rustup` installed and you can just
run `rustup run nightly cargo install tantivy-cli`. (`cargo install tantivy-cli` will work
as well if nightly is your default toolchain).

Alternatively, you can directly download a
static binary for [Linux x86 64](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-unknown-linux-musl.tar.gz) or for [Mac OS X](https://github.com/tantivy-search/tantivy-cli/releases/download/0.4.2/tantivy-cli-0.4.2-x86_64-apple-darwin.tar.gz)
and save it in a directory on your system's `PATH`.

If you are a Rust programmer, you probably have `cargo` installed and you can just
run `cargo install tantivy-cli`



@@ -217,11 +211,11 @@ the following [url](http://localhost:3000/api/?q=barack+obama&nhits=20) in your
By default this query is treated as `barack OR obama`.
You can also search for documents that contains both term, by adding a `+` sign before the terms in your query.

http://localhost:3000/api/?q=%2Bbarack%20%2Bobama%0A&nhits=20
http://localhost:3000/api/?q=%2Bbarack%20%2Bobama&nhits=20
Also, `-` makes it possible to remove documents the documents containing a specific term.

http://localhost:3000/api/?q=-barack%20%2Bobama%0A&nhits=20
http://localhost:3000/api/?q=-barack%20%2Bobama&nhits=20
Finally tantivy handle phrase queries.



+ 5
- 4
src/commands/bench.rs View File

@@ -2,7 +2,6 @@ use tantivy::Index;
use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser;
use std::path::Path;
use tantivy::TimerTree;
use std::io::BufReader;
use std::io::BufRead;
use std::io;
@@ -12,6 +11,7 @@ use tantivy::collector::TopCollector;
use tantivy::collector::CountCollector;
use clap::ArgMatches;
use std::path::PathBuf;
use timer::TimerTree;


pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -52,7 +52,7 @@ fn run_bench(index_path: &Path,
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let index = Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
@@ -66,10 +66,11 @@ fn run_bench(index_path: &Path,
// let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::default();
let timing;
let mut timing = TimerTree::default();
{
let _search = timing.open("search");
let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
timing = query.search(&searcher, &mut collector)
query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
}
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());


+ 1
- 1
src/commands/index.rs View File

@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
num_threads: usize,
no_merge: bool) -> tantivy::Result<()> {
let index = Index::open(&directory)?;
let index = Index::open_in_dir(&directory)?;
let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000);
let (doc_sender, doc_receiver) = chan::sync(10_000);


+ 3
- 3
src/commands/merge.rs View File

@@ -22,17 +22,17 @@ pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {


fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = Index::open(&path)?;
let index = Index::open_in_dir(&path)?;
let segments = index.searchable_segment_ids()?;
let segment_meta: SegmentMeta = index
.writer(HEAP_SIZE)?
.merge(&segments)
.merge(&segments)?
.wait()
.expect("Merge failed");
//.map_err(|_| tantivy::Error::ErrorInThread(String::from("Merge got cancelled")));
println!("Merge finished with segment meta {:?}", segment_meta);
println!("Garbage collect irrelevant segments.");
Index::open(&path)?
Index::open_in_dir(&path)?
.writer_with_num_threads(1, 40_000_000)?
.garbage_collect_files()?;
Ok(())


+ 2
- 2
src/commands/new.rs View File

@@ -24,7 +24,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
io::stdout().flush().unwrap();
let mut buffer = String::new();
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
let answer = buffer.trim_right_matches("\n").to_string();
let answer = buffer.trim_right().to_string();
match predicate(&answer) {
Ok(()) => {
return answer;
@@ -145,7 +145,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
let schema = schema_builder.build();
let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap());
println!("\n{}\n", Style::new().fg(Green).paint(schema_json));
Index::create(&directory, schema)?;
Index::create_in_dir(&directory, schema)?;
Ok(())
}


+ 1
- 1
src/commands/search.rs View File

@@ -17,7 +17,7 @@ pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
}

fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
let index = Index::open(directory)?;
let index = Index::open_in_dir(directory)?;
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()


+ 2
- 2
src/commands/serve.rs View File

@@ -39,9 +39,9 @@ use tantivy::schema::Field;
use tantivy::schema::FieldType;
use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema;
use tantivy::TimerTree;
use tantivy::tokenizer::*;
use tantivy::DocAddress;
use timer::TimerTree;
use urlencoded::UrlEncodedQuery;

pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -76,7 +76,7 @@ struct IndexServer {
impl IndexServer {
fn load(path: &Path) -> IndexServer {
let index = Index::open(path).unwrap();
let index = Index::open_in_dir(path).unwrap();
index.tokenizers()
.register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40))


+ 1
- 0
src/main.rs View File

@@ -23,6 +23,7 @@ extern crate serde_derive;

use clap::{AppSettings, Arg, App, SubCommand};
mod commands;
pub mod timer;
use self::commands::*;




+ 99
- 0
src/timer.rs View File

@@ -0,0 +1,99 @@
use time::PreciseTime;

pub struct OpenTimer<'a> {
name: &'static str,
timer_tree: &'a mut TimerTree,
start: PreciseTime,
depth: u32,
}

impl<'a> OpenTimer<'a> {
/// Starts timing a new named subtask
///
/// The timer is stopped automatically
/// when the `OpenTimer` is dropped.
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name,
timer_tree: self.timer_tree,
start: PreciseTime::now(),
depth: self.depth + 1,
}
}
}

impl<'a> Drop for OpenTimer<'a> {
fn drop(&mut self) {
self.timer_tree.timings.push(Timing {
name: self.name,
duration: self.start
.to(PreciseTime::now())
.num_microseconds()
.unwrap(),
depth: self.depth,
});
}
}

/// Timing recording
#[derive(Debug, Serialize)]
pub struct Timing {
name: &'static str,
duration: i64,
depth: u32,
}

/// Timer tree
#[derive(Debug, Serialize)]
pub struct TimerTree {
timings: Vec<Timing>,
}

impl TimerTree {
/// Returns the total time elapsed in microseconds
pub fn total_time(&self) -> i64 {
self.timings.last().unwrap().duration
}

/// Open a new named subtask
pub fn open(&mut self, name: &'static str) -> OpenTimer {
OpenTimer {
name,
timer_tree: self,
start: PreciseTime::now(),
depth: 0,
}
}
}

impl Default for TimerTree {
fn default() -> TimerTree {
TimerTree {
timings: Vec::new(),
}
}
}

#[cfg(test)]
mod tests {

use super::*;

#[test]
fn test_timer() {
let mut timer_tree = TimerTree::default();
{
let mut a = timer_tree.open("a");
{
let mut ab = a.open("b");
{
let _abc = ab.open("c");
}
{
let _abd = ab.open("d");
}
}
}
assert_eq!(timer_tree.timings.len(), 4);
}
}

Loading…
Cancel
Save