Browse Source

Readded tantivy cli to this project. Fixed tantivy serve

develop
Paul Masurel 7 years ago
parent
commit
b8fafb97dc
14 changed files with 382 additions and 862 deletions
  1. +0
    -563
      Cargo.lock
  2. +6
    -2
      Cargo.toml
  3. +103
    -0
      src/commands/bench.rs
  4. +22
    -87
      src/commands/index.rs
  5. +18
    -0
      src/commands/merge.rs
  6. +4
    -15
      src/commands/mod.rs
  7. +131
    -9
      src/commands/new.rs
  8. +59
    -45
      src/commands/serve.rs
  9. +39
    -6
      src/main.rs
  10. +0
    -34
      static/index.html
  11. +0
    -21
      static/less.min.js
  12. +0
    -11
      static/main.coffee
  13. +0
    -22
      static/main.js
  14. +0
    -47
      static/style.less

+ 0
- 563
Cargo.lock View File

@@ -1,563 +0,0 @@
[root]
name = "tantivy-cli"
version = "0.1.0"
dependencies = [
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
"mount 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"staticfile 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tantivy 0.1.0",
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"urlencoded 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "aho-corasick"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "argparse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "atomicwrites"
version = "0.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "bincode"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"num 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.6.15 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "bodyparser"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"persistent 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"plugin 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "byteorder"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "combine"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "conduit-mime-types"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "cookie"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "error"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"traitobject 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "fs2"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "fst"
version = "0.1.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "gcc"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "hpack"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "httparse"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "hyper"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cookie 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"httparse 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"language-tags 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"mime 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"solicit 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"traitobject 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"typeable 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"unicase 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "iron"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"conduit-mime-types 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"error 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"modifier 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"plugin 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"typemap 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "kernel32-sys"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "language-tags"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "lazy_static"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "libc"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "log"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "lz4"
version = "1.15.131"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"gcc 0.3.26 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "matches"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "memchr"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "memmap"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fs2 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
"kernel32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "mempool"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "mime"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "modifier"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "mount"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sequence_trie 0.0.13 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "num"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "num_cpus"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "persistent"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"plugin 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "plugin"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typemap 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "rand"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "regex"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"mempool 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "regex-syntax"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "regex-syntax"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "rustc-serialize"
version = "0.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "rustc_version"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "semver"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "sequence_trie"
version = "0.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "serde"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "serde"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "serde_json"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "solicit"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"hpack 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "staticfile"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"mount 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "tantivy"
version = "0.1.0"
dependencies = [
"argparse 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"atomicwrites 0.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
"bincode 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"combine 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)",
"gcc 0.3.26 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lz4 1.15.131 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.63 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "tempdir"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "tempfile"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "time"
version = "0.1.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "traitobject"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "traitobject"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "typeable"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "typemap"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unsafe-any 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "unicase"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rustc_version 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "unicode-bidi"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "unicode-normalization"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "unsafe-any"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"traitobject 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "url"
version = "0.2.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.1.18 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "url"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "urlencoded"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bodyparser 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"iron 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"plugin 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"url 0.2.38 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "utf8-ranges"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "uuid"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "uuid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]

[[package]]
name = "winapi"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"

[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"


+ 6
- 2
Cargo.toml View File

@@ -9,10 +9,10 @@ tantivy = { path = "../tantivy" }
time = "0.1.34"
iron = "0.4"
staticfile = "0.3.0"
lazy_static = "*"
rustc-serialize = "0.3.16"
persistent="*"
clap = "2"
ansi_term = "0.8.0"

[dependencies.urlencoded]
version = "0.4"
@@ -23,4 +23,8 @@ git = "https://github.com/iron/mount.git"
# [dependencies.clap]
#version = "2"
#default-features = false
#features = [ "suggestions", "color" ]
#features = [ "suggestions", "color" ]

[[bin]]
name = "tantivy"
path = "src/main.rs"

+ 103
- 0
src/commands/bench.rs View File

@@ -0,0 +1,103 @@
use tantivy::Index;
use tantivy::schema::{Field, Schema};
use tantivy::query::QueryParser;
use tantivy::query::Query;
use std::path::Path;
use tantivy::TimerTree;
use std::io::BufReader;
use std::io::BufRead;
use std::io;
use std::fs::File;
use tantivy::collector::chain;
use tantivy::collector::TopCollector;
use tantivy::collector::CountCollector;
use clap::ArgMatches;
use std::path::PathBuf;


pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
let index_path = PathBuf::from(matches.value_of("index").unwrap());
let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
let num_repeat = try!(value_t!(matches, "num_repeat", usize).map_err(|e|format!("Failed to read num_repeat argument as an integer. {:?}", e)));
run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
}


fn extract_search_fields(schema: &Schema) -> Vec<Field> {
schema.fields()
.iter()
.enumerate()
.filter(|&(_, field_entry)| {
field_entry.is_indexed()
})
.map(|(field_id, _)| field_id as u8)
.map(Field)
.collect()
}

fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
let query_file: File = try!(File::open(&query_path));
let file = BufReader::new(&query_file);
let mut queries = Vec::new();
for line_res in file.lines() {
let line = try!(line_res);
let query = String::from(line.trim());
queries.push(query);
}
Ok(queries)
}


fn run_bench(index_path: &Path,
query_filepath: &Path,
num_repeat: usize) -> Result<(), String> {
println!("index_path : {:?}", index_path);
println!("Query : {:?}", index_path);
println!("-------------------------------\n\n\n");
let index = try!(Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e)));
let searcher = try!(index.searcher().map_err(|e| format!("Failed to acquire searcher.\n{:?}", e)));
let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
let queries = try!(read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e)));
let query_parser = QueryParser::new(index.schema(), default_search_fields);
println!("SEARCH\n");
println!("{}\t{}\t{}\t{}", "query", "num_terms", "num hits", "time in microsecs");
for _ in 0..num_repeat {
for query_txt in &queries {
let query = query_parser.parse_query(&query_txt).unwrap();
let num_terms = query.num_terms();
let mut top_collector = TopCollector::with_limit(10);
let mut count_collector = CountCollector::new();
let timing;
{
let mut collector = chain().add(&mut top_collector).add(&mut count_collector);
timing = try!(query.search(&searcher, &mut collector).map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)));
}
println!("{}\t{}\t{}\t{}", query_txt, num_terms, count_collector.count(), timing.total_time());
}
}
println!("\n\nFETCH STORE\n");
println!("{}\t{}", "query", "time in microsecs");
for _ in 0..num_repeat {
for query_txt in &queries {
let query = query_parser.parse_query(&query_txt).unwrap();
let mut top_collector = TopCollector::with_limit(10);
try!(query.search(&searcher, &mut top_collector).map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e)));
let mut timer = TimerTree::new();
{
let _scoped_timer_ = timer.open("total");
for doc_address in top_collector.docs() {
searcher.doc(&doc_address).unwrap();
}
}
println!("{}\t{}", query_txt, timer.total_time());
}
}
Ok(())
}


+ 22
- 87
src/commands/index.rs View File

@@ -1,5 +1,3 @@
use rustc_serialize::json;
use rustc_serialize::json::Json;
use std::convert::From;
use std::fs::File;
use std::io;
@@ -9,88 +7,11 @@ use std::io::Read;
use std::path::PathBuf;
use tantivy;
use tantivy::Index;
use tantivy::schema::*;
use time::PreciseTime;
use clap::ArgMatches;


#[derive(Debug)]
enum DocMappingError {
NotJSON(json::ParserError),
NotJSONObject(String),
MappingError(String, String),
OverflowError(String),
NoSuchFieldInSchema(String),
}

impl From<json::ParserError> for DocMappingError {
fn from(err: json::ParserError) -> DocMappingError {
DocMappingError::NotJSON(err)
}
}

fn doc_from_json(schema: &Schema, doc_json: &str) -> Result<Document, DocMappingError> {
let json_node = try!(Json::from_str(doc_json));
let some_json_obj = json_node.as_object();
if !some_json_obj.is_some() {
let doc_json_sample: String;
if doc_json.len() < 20 {
doc_json_sample = String::from(doc_json);
}
else {
doc_json_sample = format!("{:?}...", &doc_json[0..20]);
}
return Err(DocMappingError::NotJSONObject(doc_json_sample))
}
let json_obj = some_json_obj.unwrap();
let mut doc = Document::new();
for (field_name, field_value) in json_obj.iter() {
match schema.get_field(field_name) {
Some(field) => {
let field_entry = schema.get_field_entry(field);
match field_value {
&Json::String(ref field_text) => {
match field_entry {
&FieldEntry::Text(_, _) => {
doc.add_text(field, field_text);
}
_ => {
return Err(DocMappingError::MappingError(field_name.clone(), format!("Expected a string, got {:?}", field_value)));
}
}
}
&Json::U64(ref field_val_u64) => {
match field_entry {
&FieldEntry::U32(_, _) => {
if *field_val_u64 > (u32::max_value() as u64) {
return Err(DocMappingError::OverflowError(field_name.clone()));
}
doc.add_u32(field, *field_val_u64 as u32);
}
_ => {
return Err(DocMappingError::MappingError(field_name.clone(), format!("Expected a string, got {:?}", field_value)));
}
}
},
_ => {
return Err(DocMappingError::MappingError(field_name.clone(), String::from("Value is neither u32, nor text.")));
}
}
}
None => {
return Err(DocMappingError::NoSuchFieldInSchema(field_name.clone()))
}
}
}
Ok(doc)
}

enum DocumentSource {
FromPipe,
FromFile(PathBuf),
}

pub fn run_index_cli(argmatch: &ArgMatches) -> tantivy::Result<()> {
pub fn run_index_cli(argmatch: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
let document_source = {
match argmatch.value_of("file") {
@@ -100,24 +21,39 @@ pub fn run_index_cli(argmatch: &ArgMatches) -> tantivy::Result<()> {
None => DocumentSource::FromPipe,
}
};
run_index(index_directory, document_source)
let num_threads = try!(value_t!(argmatch, "num_threads", usize).map_err(|_|format!("Failed to read num_threads argument as an integer.")));
run_index(index_directory, document_source, num_threads).map_err(|e| format!("Indexing failed : {:?}", e))
}

fn run_index(directory: PathBuf, document_source: DocumentSource) -> tantivy::Result<()> {
enum DocumentSource {
FromPipe,
FromFile(PathBuf),
}

fn run_index(directory: PathBuf, document_source: DocumentSource, num_threads: usize) -> tantivy::Result<()> {
let index = try!(Index::open(&directory));
let schema = index.schema();
let mut index_writer = index.writer_with_num_threads(8).unwrap();
let mut index_writer = try!(
if num_threads > 0 {
index.writer_with_num_threads(num_threads)
}
else {
index.writer()
}
);
let articles = try!(document_source.read());
let mut num_docs = 0;
let mut cur = PreciseTime::now();
let group_count = 10000;
let group_count = 100000;
for article_line_res in articles.lines() {
let article_line = article_line_res.unwrap(); // TODO
match doc_from_json(&schema, &article_line) {
match schema.parse_document(&article_line) {
Ok(doc) => {
index_writer.add_document(doc).unwrap();
}
@@ -125,12 +61,11 @@ fn run_index(directory: PathBuf, document_source: DocumentSource) -> tantivy::Re
println!("Failed to add document doc {:?}", err);
}
}

if num_docs > 0 && (num_docs % group_count == 0) {
println!("{} Docs", num_docs);
let new = PreciseTime::now();
let elapsed = cur.to(new);
println!("{:?} docs / hour", group_count * 3600 * 1e6 as u64 / (elapsed.num_microseconds().unwrap() as u64));
println!("{:?} docs / hour", group_count * 3600 * 1_000_000 as u64 / (elapsed.num_microseconds().unwrap() as u64));
cur = new;
}



+ 18
- 0
src/commands/merge.rs View File

@@ -0,0 +1,18 @@
extern crate tantivy;

use tantivy::Index;
use std::path::PathBuf;
use clap::ArgMatches;

pub fn run_merge_cli(argmatch: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(argmatch.value_of("index").unwrap());
run_merge(index_directory).map_err(|e| format!("Indexing failed : {:?}", e))
}


fn run_merge(path: PathBuf) -> tantivy::Result<()> {
let index = try!(Index::open(&path));
let segments = index.segments();
let mut index_writer = try!(index.writer());
index_writer.merge(&segments)
}

+ 4
- 15
src/commands/mod.rs View File

@@ -1,22 +1,11 @@
mod index;
mod serve;
mod new;
mod bench;
mod merge;

pub use self::new::run_new_cli;
pub use self::index::run_index_cli;
pub use self::serve::run_serve_cli;

// pub mod writer;
// pub mod searcher;
// pub mod index;
// pub mod merger;

// mod segment_serializer;
// mod segment_writer;
// mod segment_reader;
// mod segment_id;
// mod segment_component;

// pub use self::segment_component::SegmentComponent;
// pub use self::segment_id::SegmentId;
// pub use self::segment_reader::SegmentReader;
pub use self::bench::run_bench_cli;
pub use self::merge::run_merge_cli;

+ 131
- 9
src/commands/new.rs View File

@@ -2,24 +2,146 @@ use clap::ArgMatches;
use std::convert::From;
use std::path::PathBuf;
use tantivy;
use tantivy::schema::{Schema, STRING, STORED, TEXT};
use tantivy::schema::*;
use tantivy::Index;
use std::io;
use ansi_term::Style;
use ansi_term::Colour::{Red, Blue, Green};
use std::io::Write;
use std::ascii::AsciiExt;
use rustc_serialize::json;

fn default_schema() -> Schema {
let mut schema = Schema::new();
schema.add_text_field("url", STRING | STORED);
schema.add_text_field("title", TEXT | STORED);
schema.add_text_field("body", TEXT | STORED);
schema
}

pub fn run_new_cli(matches: &ArgMatches) -> tantivy::Result<()> {
let index_directory = PathBuf::from(matches.value_of("index").unwrap());
run_new(index_directory)
}


fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: P) -> String {
loop {
print!("{prompt_text:<width$} ? ", prompt_text=Style::new().bold().fg(Blue).paint(prompt_text), width=40);
io::stdout().flush().unwrap();
let mut buffer = String::new();
io::stdin().read_line(&mut buffer).ok().expect("Failed to read line");
let answer = buffer.trim_right_matches("\n").to_string();
match predicate(&answer) {
Ok(()) => {
return answer;
}
Err(msg) => {
println!("Error: {}", Style::new().bold().fg(Red).paint(msg));
}
}
}
}


fn field_name_validate(field_name: &str) -> Result<(), String> {
if is_valid_field_name(field_name) {
Ok(())
}
else {
Err(String::from("Field name must match the pattern [_a-zA-Z0-9]+"))
}
}
fn prompt_options(msg: &str, codes: Vec<char>) -> char {
let options_string: Vec<String> = codes.iter().map(|c| format!("{}", c)).collect();
let options = options_string.join("/");
let predicate = |entry: &str| {
if entry.len() != 1 {
return Err(format!("Invalid input. Options are ({})", options))
}
let c = entry.chars().next().unwrap().to_ascii_uppercase();
if codes.contains(&c) {
return Ok(())
}
else {
return Err(format!("Invalid input. Options are ({})", options))
}
};
let message = format!("{} ({})", msg, options);
let entry = prompt_input(&message, predicate);
entry.chars().next().unwrap().to_ascii_uppercase()
}

fn prompt_yn(msg: &str) -> bool {
prompt_options(msg, vec!('Y', 'N')) == 'Y'
}


fn ask_add_field_text(field_name: &str, schema: &mut Schema) {
let mut text_options = TextOptions::new();
if prompt_yn("Should the field be stored") {
text_options = text_options.set_stored();
}
let is_indexed = prompt_yn("Should the field be indexed");
let indexing_options = if is_indexed {
if prompt_yn("Should the field be tokenized") {
if prompt_yn("Should the term frequencies (per doc) be in the index") {
if prompt_yn("Should the term positions (per doc) be in the index") {
TextIndexingOptions::TokenizedWithFreqAndPosition
}
else {
TextIndexingOptions::TokenizedWithFreq
}
}
else {
TextIndexingOptions::TokenizedNoFreq
}
}
else {
TextIndexingOptions::Unindexed
}
}
else {
TextIndexingOptions::Unindexed
};
text_options = text_options.set_indexing_options(indexing_options);
schema.add_text_field(field_name, text_options);
}


fn ask_add_field_u32(field_name: &str, schema: &mut Schema) {
let mut u32_options = U32Options::new();
if prompt_yn("Should the field be stored") {
u32_options = u32_options.set_stored();
}
if prompt_yn("Should the field be fast") {
u32_options = u32_options.set_fast();
}
if prompt_yn("Should the field be indexed") {
u32_options = u32_options.set_indexed();
}
schema.add_u32_field(field_name, u32_options);
}

fn ask_add_field(schema: &mut Schema) {
println!("\n\n");
let field_name = prompt_input("New field name ", field_name_validate);
let text_or_integer = prompt_options("Text or unsigned 32-bit Integer", vec!('T', 'I'));
if text_or_integer =='T' {
ask_add_field_text(&field_name, schema);
}
else {
ask_add_field_u32(&field_name, schema);
}
}

fn run_new(directory: PathBuf) -> tantivy::Result<()> {
let schema = default_schema();
println!("\n{} ", Style::new().bold().fg(Green).paint("Creating new index"));
println!("{} ", Style::new().bold().fg(Green).paint("Let's define it's schema!"));
let mut schema = Schema::new();
loop {
ask_add_field(&mut schema);
if !prompt_yn("Add another field") {
break;
}
}
let schema_json = format!("{}", json::as_pretty_json(&schema));
println!("\n{}\n", Style::new().fg(Green).paint(schema_json));
let mut index = try!(Index::create(&directory, schema));
index.save_metas()
}


+ 59
- 45
src/commands/serve.rs View File

@@ -6,7 +6,6 @@ use iron::typemap::Key;
use mount::Mount;
use persistent::Read;
use rustc_serialize::json::as_pretty_json;
use staticfile::Static;
use std::convert::From;
use std::path::Path;
use std::path::PathBuf;
@@ -20,22 +19,23 @@ use tantivy::query::Explanation;
use tantivy::query::Query;
use tantivy::query::QueryParser;
use tantivy::Result;
use tantivy::schema::Field;
use tantivy::Score;
use tantivy::schema::Schema;
use tantivy::schema::NamedFieldDocument;
use urlencoded::UrlEncodedQuery;

use std::str::FromStr;
use std::fmt::{self, Debug};
use std::error::Error;

pub fn run_serve_cli(matches: &ArgMatches) -> tantivy::Result<()> {
let index_directory = PathBuf::from(matches.value_of("index").unwrap());
let port = value_t!(matches, "port", u16).unwrap_or(3000u16);
let host_str = matches.value_of("host").unwrap_or("localhost");
// let host = Ipv4Addr::from_str(&host_str).unwrap(); // TODO err management
let host = format!("{}:{}", host_str, port);
run_serve(index_directory, &host)
}


#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Serp {
q: String,
num_hits: usize,
@@ -43,15 +43,13 @@ struct Serp {
timings: Vec<Timing>,
}

#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Hit {
title: String,
body: String,
explain: String,
score: Score,
doc: NamedFieldDocument,
explain: Option<Explanation>,
}

#[derive(RustcDecodable, RustcEncodable)]
#[derive(RustcEncodable)]
struct Timing {
name: String,
duration: i64,
@@ -60,8 +58,7 @@ struct Timing {
struct IndexServer {
index: Index,
query_parser: QueryParser,
body_field: Field,
title_field: Field,
schema: Schema,
}

impl IndexServer {
@@ -71,29 +68,26 @@ impl IndexServer {
let schema = index.schema();
let body_field = schema.get_field("body").unwrap();
let title_field = schema.get_field("title").unwrap();
let query_parser = QueryParser::new(schema, vec!(body_field, title_field));
let query_parser = QueryParser::new(schema.clone(), vec!(body_field, title_field));
IndexServer {
index: index,
query_parser: query_parser,
title_field: title_field,
body_field: body_field,
schema: schema,
}
}

fn create_hit(&self, doc: &Document, explain: Explanation) -> Hit {
fn create_hit(&self, doc: &Document, explain: Option<Explanation>) -> Hit {
Hit {
title: String::from(doc.get_first(self.title_field).unwrap().text()),
body: String::from(doc.get_first(self.body_field).unwrap().text().clone()),
explain: format!("{:?}", explain),
score: explain.val(),
doc: self.schema.to_named_doc(&doc),
explain: explain,
}
}
fn search(&self, q: String) -> Result<Serp> {
fn search(&self, q: String, num_hits: usize, explain: bool) -> Result<Serp> {
let query = self.query_parser.parse_query(&q).unwrap();
let searcher = self.index.searcher().unwrap();
let mut count_collector = CountCollector::new();
let mut top_collector = TopCollector::with_limit(10);
let mut top_collector = TopCollector::with_limit(num_hits);

{
let mut chained_collector = collector::chain()
@@ -105,7 +99,13 @@ impl IndexServer {
.iter()
.map(|doc_address| {
let doc: Document = searcher.doc(doc_address).unwrap();
let explanation = query.explain(&searcher, doc_address).unwrap();
let explanation;
if explain {
explanation = Some(query.explain(&searcher, doc_address).unwrap());
}
else {
explanation = None;
}
self.create_hit(&doc, explanation)
})
.collect();
@@ -122,36 +122,50 @@ impl Key for IndexServer {
type Value = IndexServer;
}

#[derive(Debug)]
struct StringError(String);

impl fmt::Display for StringError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self, f)
}
}

impl Error for StringError {
fn description(&self) -> &str { &*self.0 }
}

fn search(req: &mut Request) -> IronResult<Response> {
let index_server = req.get::<Read<IndexServer>>().unwrap();
match req.get_ref::<UrlEncodedQuery>() {
Ok(ref qs_map) => {
match qs_map.get("q") {
Some(qs) => {
let query = qs[0].clone();
let serp = index_server.search(query).unwrap();
let resp_json = as_pretty_json(&serp).indent(4);
let content_type = "application/json".parse::<Mime>().unwrap();
Ok(
Response::with((content_type, status::Ok, format!("{}", resp_json)))
)
}
None => {
Ok(Response::with((status::BadRequest, "Query not defined")))
}
}
}
Err(_) => Ok(Response::with((status::BadRequest, "Failed to parse query string")))
}
req.get_ref::<UrlEncodedQuery>()
.map_err(|_| IronError::new(StringError(String::from("Failed to decode error")), status::BadRequest))
.and_then(|ref qs_map| {
let num_hits: usize = qs_map
.get("nhits")
.and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
.unwrap_or(10);
let explain: bool = qs_map
.get("explain")
.map(|s| &s[0] == &"true")
.unwrap_or(false);
let query = try!(qs_map
.get("q")
.ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest)))[0].clone();
let serp = index_server.search(query, num_hits, explain).unwrap();
let resp_json = as_pretty_json(&serp).indent(4);
let content_type = "application/json".parse::<Mime>().unwrap();
Ok(Response::with((content_type, status::Ok, format!("{}", resp_json))))
})
}



fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> {
let mut mount = Mount::new();
let server = IndexServer::load(&directory);
mount.mount("/api", search);
mount.mount("/", Static::new(Path::new("static/")));
let mut middleware = Chain::new(mount);
middleware.link(Read::<IndexServer>::both(server));


+ 39
- 6
src/main.rs View File

@@ -1,15 +1,14 @@
#[macro_use]
extern crate clap;
#[macro_use]
extern crate lazy_static;
extern crate rustc_serialize;
extern crate tantivy;
extern crate time;
// extern crate regex;
extern crate persistent;
extern crate urlencoded;
extern crate iron;
extern crate staticfile;
extern crate ansi_term;
extern crate mount;

use clap::{AppSettings, Arg, App, SubCommand};
@@ -18,7 +17,6 @@ use self::commands::*;


fn main() {
let index_arg = Arg::with_name("index")
.short("i")
.long("index")
@@ -61,8 +59,36 @@ fn main() {
.short("f")
.long("file")
.value_name("file")
.help("File containing the documents to index.")
))
.help("File containing the documents to index."))
.arg(Arg::with_name("num_threads")
.short("t")
.long("num_threads")
.value_name("num_threads")
.help("Number of indexing thread. By default num cores - 1 will be used")
.default_value("0"))
)
.subcommand(
SubCommand::with_name("bench")
.about("Run a benchmark on your index")
.arg(index_arg.clone())
.arg(Arg::with_name("queries")
.short("q")
.long("queries")
.value_name("queries")
.help("File containing queries (one-per line) to run in the benchmark.")
.required(true))
.arg(Arg::with_name("num_repeat")
.short("n")
.long("num_repeat")
.value_name("num_repeat")
.help("Number of time to repeat the benchmark.")
.default_value("1"))
)
.subcommand(
SubCommand::with_name("merge")
.about("Merge all the segments of an index")
.arg(index_arg.clone())
)
.get_matches();
let (subcommand, some_options) = cli_options.subcommand();
@@ -70,10 +96,17 @@ fn main() {
let options = some_options.unwrap();
match subcommand {
// "serve" => run_serve(options),
"new" => run_new_cli(options).unwrap(),
"index" => run_index_cli(options).unwrap(),
"serve" => run_serve_cli(options).unwrap(),
"merge" => run_merge_cli(options).unwrap(),
"bench" => {
let res = run_bench_cli(options);
match res {
Err(e) => { println!("{}", e);}
_ => {}
}
},
_ => {}
}
}

+ 0
- 34
static/index.html View File

@@ -1,34 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet/less" type="text/css" href="style.less"/>
<title>Wikipedia search (powered by tantivy)</title>
</head>
<body>
<script id="template" type="x-tmpl-mustache">
<div class='query'>{{ query }}</div>
<div class='num_hits'>{{ num_hits }} articles</div>
<ul class="timings">
{{#timings}}
<li>{{name}} - {{duration}} &mu;s</li>
{{/timings}}
</ul>
</ul>
<ul class='hits'>
{{#hits}}
<li>{{title}}</li>
{{/hits}}
</ul>

</script>
<form onsubmit="event.preventDefault(); search();">
<input type="text" id="q" autocomplete="off"></input>
<!-- <input type="submit"></input> -->
</form>
<div id="serp"></div>
<script src="less.min.js" type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mustache.js/2.2.1/mustache.min.js"></script>
<script src="https://code.jquery.com/jquery-1.12.0.min.js"></script>
<script src="main.js"></script>
</body>
</html>

+ 0
- 21
static/less.min.js
File diff suppressed because it is too large
View File


+ 0
- 11
static/main.coffee View File

@@ -1,11 +0,0 @@

handle = (serp, status, resp)->
template = $('#template').html()
Mustache.parse(template)
rendered = Mustache.render(template, serp)
$("#serp").html(rendered)

window.search = ->
q = $('#q').val()
$.getJSON('/api', {q:q}, handle)
true

+ 0
- 22
static/main.js View File

@@ -1,22 +0,0 @@
// Generated by CoffeeScript 1.9.2
(function() {
var handle;

handle = function(serp, status, resp) {
var rendered, template;
template = $('#template').html();
Mustache.parse(template);
rendered = Mustache.render(template, serp);
return $("#serp").html(rendered);
};

window.search = function() {
var q;
q = $('#q').val();
$.getJSON('/api', {
q: q
}, handle);
return true;
};

}).call(this);

+ 0
- 47
static/style.less View File

@@ -1,47 +0,0 @@
body {
background-color: #efefef;
padding: 30px;
}

* {
font: 16px arial,sans-serif;
}

input, button {
font-size: 20px;
padding: 5px;
}

ul.timings {
margin-top:30px;
list-style: none outside none; margin:0; padding: 0;
color: #888;

li {
float: left; margin: 0 7px;
}
}



input {
color: #333;
}

div.num_hits {
color: #09f;
}

div.query {
display: none;
}

ul.hits {
margin-top: 30px;
clear: both;
padding: 0;
list-style-type: none;
li {
margin: 10px;
}
}

Loading…
Cancel
Save