Browse Source

Merge branch 'develop'

develop
Paul Masurel 6 years ago
parent
commit
771017bc84
7 changed files with 116 additions and 89 deletions
  1. +44
    -28
      Cargo.lock
  2. +3
    -2
      Cargo.toml
  3. +10
    -10
      src/commands/bench.rs
  4. +9
    -12
      src/commands/index.rs
  5. +21
    -19
      src/commands/new.rs
  6. +5
    -5
      src/commands/search.rs
  7. +24
    -13
      src/commands/serve.rs

+ 44
- 28
Cargo.lock View File

@@ -82,16 +82,6 @@ dependencies = [
"serde 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "bincode"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "bit-set"
version = "0.4.0"
@@ -207,7 +197,12 @@ dependencies = [

[[package]]
name = "crossbeam"
version = "0.2.12"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "downcast"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
@@ -263,13 +258,11 @@ dependencies = [

[[package]]
name = "fst"
version = "0.1.38"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
@@ -457,6 +450,11 @@ dependencies = [
"libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "maplit"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "matches"
version = "0.1.6"
@@ -704,6 +702,11 @@ name = "regex-syntax"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "rust-stemmers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "rustc-demangle"
version = "0.1.5"
@@ -838,30 +841,31 @@ dependencies = [

[[package]]
name = "tantivy"
version = "0.4.4"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"atomicwrites 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"cc 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"chan 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)",
"combine 2.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"downcast 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"error-chain 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-cpupool 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.35 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
"lz4 1.22.0 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rust-stemmers 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -869,14 +873,14 @@ dependencies = [
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
"tinysegmenter 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"version 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "tantivy-cli"
version = "0.4.4"
version = "0.5.0"
dependencies = [
"ansi_term 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bincode 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -893,7 +897,7 @@ dependencies = [
"serde_derive 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)",
"staticfile 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tantivy 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"tantivy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
"urlencoded 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"version 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -973,6 +977,15 @@ dependencies = [
"winapi 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "tinysegmenter"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "traitobject"
version = "0.0.1"
@@ -1143,7 +1156,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum backtrace 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ebbbf59b1c43eefa8c3ede390fcc36820b4999f7914104015be25025e0d62af2"
"checksum backtrace-sys 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "44585761d6161b0f57afc49482ab6bd067e4edef48c12a152c237eb0203f7661"
"checksum bincode 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "60f89d68caf4f2e8a94efd192a2b8393869e72336dea4e0fe077cc6eb5f2057e"
"checksum bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e103c8b299b28a9c6990458b7013dc4a8356a9b854c51b9883241f5866fac36e"
"checksum bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9bf6104718e80d7b26a68fdbacff3481cfc05df670821affc7e9cbc1884400c"
"checksum bit-vec 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "02b4ff8b16e6076c3e14220b39fbc1fabb6737522281a388998046859400895f"
"checksum bitflags 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4f67931368edf3a9a51d29886d245f1c3db2f1ef0dcc9e35ff70341b78c10d23"
@@ -1160,7 +1172,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum combine 2.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1645a65a99c7c8d345761f4b75a6ffe5be3b3b27a93ee731fccc5050ba6be97c"
"checksum conduit-mime-types 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "95ca30253581af809925ef68c2641cc140d6183f43e12e0af4992d53768bd7b8"
"checksum cookie 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0e3d6405328b6edb412158b3b7710e2634e23f3614b9bb1c412df7952489a626"
"checksum crossbeam 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "bd66663db5a988098a89599d4857919b3acf7f61402e61365acfd3919857b9be"
"checksum crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19"
"checksum downcast 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6c6fe31318b6ef21166c8e839e680238eb16f875849d597544eead7ec882eed3"
"checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d"
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
"checksum either 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "740178ddf48b1a9e878e6d6509a1442a2d42fd2928aae8e7a6f8a36fb01981b3"
@@ -1168,7 +1181,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum error 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "a6e606f14042bb87cc02ef6a14db6c90ab92ed6f62d87e69377bc759fd7987cc"
"checksum error-chain 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6930e04918388a9a2e41d518c25cf679ccafe26733fb4127dbf21993f2575d46"
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
"checksum fst 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)" = "4667468a5e6f0eea9cc30ebf1cce752cb831974e319d7fff312aad85652c1596"
"checksum fst 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "11e21bdd626be09f2bd66b44dbb724538176aa3549f3109208db35538dd2699f"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
"checksum futures 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "118b49cac82e04121117cbd3121ede3147e885627d82c4546b87c702debb90c1"
@@ -1193,6 +1206,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2"
"checksum lz4 1.22.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fe55d2ebbc2e4fc987e6fbfc13f416d97b06d06e50bc1124d613aa790842f80c"
"checksum lz4-sys 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a59044c3ba3994f3d2aa2270ddd6c5947922219501e67efde5604d36aad462b5"
"checksum maplit 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "22593015b8df7747861c69c28acd32589fb96c1686369f3b661d12e409d4cf65"
"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"
@@ -1223,6 +1237,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum regex 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "744554e01ccbd98fff8c457c3b092cd67af62a555a43bfe97ae8a0451f7799fa"
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
"checksum regex-syntax 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e931c58b93d86f080c734bfd2bce7dd0079ae2331235818133c8be7f422e20e"
"checksum rust-stemmers 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8398e39ef1740238f87fcc4171fccc2231ba7ef1ecd64075d77feb0041927fc7"
"checksum rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "aee45432acc62f7b9a108cc054142dac51f979e69e71ddce7d6fc7adf29e817e"
"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
"checksum sequence_trie 0.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "d5b4eb0f7d1ff9b9666d8b8ff543f3705dd464025269a5b0e1988ffa60ca1be8"
@@ -1240,7 +1255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
"checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad"
"checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6"
"checksum tantivy 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "dc901afb44c7e1f163383c2ae8af25d1e212bbcb0bcc95f5485f0e62749d17c5"
"checksum tantivy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dcaf44ed828eea2da561b2bb5bb490e9dc68ad375609330e512818c143cc9d7c"
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
"checksum tempfile 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "11ce2fe9db64b842314052e2421ac61a73ce41b898dc8e3750398b219c5fc1e0"
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
@@ -1249,6 +1264,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
"checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
"checksum time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "a15375f1df02096fb3317256ce2cee6a1f42fc84ea5ad5fc8c421cfe40c73098"
"checksum tinysegmenter 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7e767ff68150da3d23c88482da07abd6532e2e928093b80e79dc4818119bbc36"
"checksum traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "07eaeb7689bb7fca7ce15628319635758eda769fed481ecfe6686ddef2600616"
"checksum traitobject 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "efd1f82c56340fdf16f2a953d7bda4f8fdffba13d93b00844c25572110b26079"
"checksum typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1410f6f91f21d1612654e7cc69193b0334f909dcf2c790c4826254fbb86f8887"


+ 3
- 2
Cargo.toml View File

@@ -1,6 +1,6 @@
[package]
name = "tantivy-cli"
version = "0.4.5"
version = "0.5.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]

description = """Command line interface for Tantivy, a search engine library."""
@@ -31,7 +31,7 @@ log = "0.3"
futures = "0.1"
env_logger = "0.3"
version = "2"
tantivy = "0.4.4"
tantivy = "0.5.0"

[[bin]]
name = "tantivy"
@@ -44,6 +44,7 @@ debug = false
debug-assertions = false
lto = true


[features]
default = ["tantivy/simdcompression"]


+ 10
- 10
src/commands/bench.rs View File

@@ -17,7 +17,7 @@ use std::path::PathBuf;
pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
let index_path = PathBuf::from(matches.value_of("index").unwrap());
let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
let num_repeat = try!(value_t!(matches, "num_repeat", usize).map_err(|e|format!("Failed to read num_repeat argument as an integer. {:?}", e)));
let num_repeat = value_t!(matches, "num_repeat", usize).map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
}

@@ -34,13 +34,11 @@ fn extract_search_fields(schema: &Schema) -> Vec<Field> {
}

fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
let query_file: File = try!(File::open(&query_path));
let query_file: File = File::open(&query_path)?;
let file = BufReader::new(&query_file);
let mut queries = Vec::new();
for line_res in file.lines() {
let line = try!(line_res);
let query = String::from(line.trim());
queries.push(query);
queries.push(line_res?);
}
Ok(queries)
}
@@ -54,11 +52,11 @@ fn run_bench(index_path: &Path,
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");
let index = try!(Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e)));
let index = Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
let searcher = index.searcher();
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = try!(read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e)));
let query_parser = QueryParser::new(index.schema(), default_search_fields);
let queries = read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e))?;
let query_parser = QueryParser::new(index.schema(), default_search_fields, index.tokenizers().clone());
println!("SEARCH\n");
println!("{}\t{}\t{}\t{}", "query", "num_terms", "num hits", "time in microsecs");
@@ -71,7 +69,8 @@ fn run_bench(index_path: &Path,
let timing;
{
let mut collector = chain().push(&mut top_collector).push(&mut count_collector);
timing = try!(query.search(&searcher, &mut collector).map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)));
timing = query.search(&searcher, &mut collector)
.map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e))?;
}
println!("{}\t{}\t{}", query_txt, count_collector.count(), timing.total_time());
}
@@ -84,7 +83,8 @@ fn run_bench(index_path: &Path,
for query_txt in &queries {
let query = query_parser.parse_query(&query_txt).unwrap();
let mut top_collector = TopCollector::with_limit(10);
try!(query.search(&searcher, &mut top_collector).map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e)));
query.search(&searcher, &mut top_collector)
.map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e))?;
let mut timer = TimerTree::default();
{
let _scoped_timer_ = timer.open("total");


+ 9
- 12
src/commands/index.rs View File

@@ -22,11 +22,11 @@ pub fn run_index_cli(argmatch: &ArgMatches) -> Result<(), String> {
.map(|path| DocumentSource::FromFile(PathBuf::from(path)))
.unwrap_or(DocumentSource::FromPipe);
let no_merge = argmatch.is_present("nomerge");
let mut num_threads = try!(value_t!(argmatch, "num_threads", usize).map_err(|_|format!("Failed to read num_threads argument as an integer.")));
let mut num_threads = value_t!(argmatch, "num_threads", usize).map_err(|_| format!("Failed to read num_threads argument as an integer."))?;
if num_threads == 0 {
num_threads = 1;
}
let buffer_size = try!(value_t!(argmatch, "memory_size", usize).map_err(|_|format!("Failed to read the buffer size argument as an integer.")));
let buffer_size = value_t!(argmatch, "memory_size", usize).map_err(|_| format!("Failed to read the buffer size argument as an integer."))?;
let buffer_size_per_thread = buffer_size / num_threads;
run_index(index_directory, document_source, buffer_size_per_thread, num_threads, no_merge).map_err(|e| format!("Indexing failed : {:?}", e))
}
@@ -37,7 +37,7 @@ fn run_index(directory: PathBuf,
num_threads: usize,
no_merge: bool) -> tantivy::Result<()> {
let index = try!(Index::open(&directory));
let index = Index::open(&directory)?;
let schema = index.schema();
let (line_sender, line_receiver) = chan::sync(10_000);
let (doc_sender, doc_receiver) = chan::sync(10_000);
@@ -71,14 +71,11 @@ fn run_index(directory: PathBuf,
}
drop(doc_sender);

let mut index_writer = try!(
if num_threads > 0 {
index.writer_with_num_threads(num_threads, buffer_size_per_thread)
}
else {
index.writer(buffer_size_per_thread)
}
);
let mut index_writer = if num_threads > 0 {
index.writer_with_num_threads(num_threads, buffer_size_per_thread)
} else {
index.writer(buffer_size_per_thread)
}?;
if no_merge {
index_writer.set_merge_policy(Box::new(NoMergePolicy));
@@ -145,7 +142,7 @@ impl DocumentSource {
BufReader::new(Box::new(io::stdin()))
}
&DocumentSource::FromFile(ref filepath) => {
let read_file = try!(File::open(&filepath));
let read_file = File::open(&filepath)?;
BufReader::new(Box::new(read_file))
}
})


+ 21
- 19
src/commands/new.rs View File

@@ -2,13 +2,13 @@ use clap::ArgMatches;
use std::convert::From;
use std::path::PathBuf;
use tantivy;
use tantivy::schema::Cardinality;
use tantivy::schema::*;
use tantivy::Index;
use std::io;
use ansi_term::Style;
use ansi_term::Colour::{Red, Blue, Green};
use std::io::Write;
use std::ascii::AsciiExt;
use serde_json;


@@ -77,29 +77,31 @@ fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
if prompt_yn("Should the field be stored") {
text_options = text_options.set_stored();
}
let is_indexed = prompt_yn("Should the field be indexed");
let indexing_options = if is_indexed {
if prompt_yn("Should the field be tokenized") {



if prompt_yn("Should the field be indexed") {
let mut text_indexing_options = TextFieldIndexing
::default()
.set_index_option(IndexRecordOption::Basic)
.set_tokenizer("en_stem");

if prompt_yn("Should the term be tokenized?") {
if prompt_yn("Should the term frequencies (per doc) be in the index") {
if prompt_yn("Should the term positions (per doc) be in the index") {
TextIndexingOptions::TokenizedWithFreqAndPosition
}
else {
TextIndexingOptions::TokenizedWithFreq
text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqsAndPositions);
} else {
text_indexing_options = text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
}
}
else {
TextIndexingOptions::TokenizedNoFreq
}
}
else {
TextIndexingOptions::Untokenized
} else {
text_indexing_options = text_indexing_options.set_tokenizer("raw");
}

text_options = text_options.set_indexing_options(text_indexing_options);
}
else {
TextIndexingOptions::Unindexed
};
text_options = text_options.set_indexing_options(indexing_options);


schema_builder.add_text_field(field_name, text_options);
}

@@ -110,7 +112,7 @@ fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
u64_options = u64_options.set_stored();
}
if prompt_yn("Should the field be fast") {
u64_options = u64_options.set_fast();
u64_options = u64_options.set_fast(Cardinality::SingleValue);
}
if prompt_yn("Should the field be indexed") {
u64_options = u64_options.set_indexed();


+ 5
- 5
src/commands/search.rs View File

@@ -24,10 +24,10 @@ fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
.iter()
.enumerate()
.filter(
|&(_, ref field_entry)| {
|&(_, ref field_entry) | {
match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_indexed()
text_field_options.get_indexing_options().is_some()
},
_ => false
}
@@ -35,13 +35,13 @@ fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
)
.map(|(i, _)| Field(i as u32))
.collect();
let query_parser = QueryParser::new(schema.clone(), default_fields);
let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
let query = query_parser.parse_query(query)?;
let searcher = index.searcher();
let weight = query.weight(&searcher)?;
let weight = query.weight(&searcher, false)?;
let schema = index.schema();
for segment_reader in searcher.segment_readers() {
let mut scorer = try!(weight.scorer(segment_reader));
let mut scorer = weight.scorer(segment_reader)?;
while scorer.advance() {
let doc_id = scorer.doc();
let doc = segment_reader.doc(doc_id)?;


+ 24
- 13
src/commands/serve.rs View File

@@ -40,6 +40,8 @@ use tantivy::schema::FieldType;
use tantivy::schema::NamedFieldDocument;
use tantivy::schema::Schema;
use tantivy::TimerTree;
use tantivy::tokenizer::*;
use tantivy::DocAddress;
use urlencoded::UrlEncodedQuery;

pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -62,6 +64,7 @@ struct Serp {
#[derive(Serialize)]
struct Hit {
doc: NamedFieldDocument,
id: u32,
}

struct IndexServer {
@@ -74,6 +77,13 @@ impl IndexServer {
fn load(path: &Path) -> IndexServer {
let index = Index::open(path).unwrap();
index.tokenizers()
.register("commoncrawl", SimpleTokenizer
.filter(RemoveLongFilter::limit(40))
.filter(LowerCaser)
.filter(AlphaNumOnlyFilter)
.filter(Stemmer::new())
);
let schema = index.schema();
let default_fields: Vec<Field> = schema
.fields()
@@ -83,7 +93,7 @@ impl IndexServer {
|&(_, ref field_entry)| {
match *field_entry.field_type() {
FieldType::Str(ref text_field_options) => {
text_field_options.get_indexing_options().is_indexed()
text_field_options.get_indexing_options().is_some()
},
_ => false
}
@@ -91,17 +101,18 @@ impl IndexServer {
)
.map(|(i, _)| Field(i as u32))
.collect();
let query_parser = QueryParser::new(schema.clone(), default_fields);
let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
IndexServer {
index: index,
query_parser: query_parser,
schema: schema,
index,
query_parser,
schema,
}
}

fn create_hit(&self, doc: &Document) -> Hit {
fn create_hit(&self, doc: &Document, doc_address: &DocAddress) -> Hit {
Hit {
doc: self.schema.to_named_doc(&doc)
doc: self.schema.to_named_doc(&doc),
id: doc_address.doc(),
}
}
@@ -116,7 +127,7 @@ impl IndexServer {
let mut chained_collector = collector::chain()
.push(&mut top_collector)
.push(&mut count_collector);
try!(query.search(&searcher, &mut chained_collector));
query.search(&searcher, &mut chained_collector)?;
}
let hits: Vec<Hit> = {
let _fetching_timer = timer_tree.open("fetching docs");
@@ -124,14 +135,14 @@ impl IndexServer {
.iter()
.map(|doc_address| {
let doc: Document = searcher.doc(doc_address).unwrap();
self.create_hit(&doc)
self.create_hit(&doc, doc_address)
})
.collect()
};
Ok(Serp {
q: q,
q,
num_hits: count_collector.count(),
hits: hits,
hits,
timings: timer_tree,
})
}
@@ -163,9 +174,9 @@ fn search(req: &mut Request) -> IronResult<Response> {
.get("nhits")
.and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
.unwrap_or(10);
let query = try!(qs_map
let query = qs_map
.get("q")
.ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest)))[0].clone();
.ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest))?[0].clone();
let serp = index_server.search(query, num_hits).unwrap();
let resp_json = serde_json::to_string_pretty(&serp).unwrap();
let content_type = "application/json".parse::<Mime>().unwrap();


Loading…
Cancel
Save