|
@@ -1,118 +1,152 @@ |
|
|
extern crate tantivy; |
|
|
extern crate tantivy; |
|
|
extern crate time; |
|
|
extern crate time; |
|
|
extern crate urlencoded; |
|
|
extern crate urlencoded; |
|
|
|
|
|
#[macro_use] |
|
|
|
|
|
extern crate lazy_static; |
|
|
|
|
|
extern crate rustc_serialize; |
|
|
|
|
|
extern crate iron; |
|
|
|
|
|
extern crate staticfile; |
|
|
|
|
|
extern crate mount; |
|
|
|
|
|
|
|
|
use tantivy::collector::{CountCollector, FirstNCollector, MultiCollector}; |
|
|
use tantivy::collector::{CountCollector, FirstNCollector, MultiCollector}; |
|
|
use tantivy::schema::*; |
|
|
|
|
|
use tantivy::Searcher; |
|
|
|
|
|
use tantivy::Directory; |
|
|
|
|
|
use std::io; |
|
|
|
|
|
|
|
|
use tantivy::schema::{TextField, Term}; |
|
|
|
|
|
use tantivy::Index; |
|
|
use std::convert::From; |
|
|
use std::convert::From; |
|
|
use std::path::PathBuf; |
|
|
|
|
|
use std::io::BufRead; |
|
|
|
|
|
use time::PreciseTime; |
|
|
use time::PreciseTime; |
|
|
use urlencoded::UrlEncodedQuery; |
|
|
use urlencoded::UrlEncodedQuery; |
|
|
|
|
|
use tantivy::analyzer::SimpleTokenizer; |
|
|
use iron::status; |
|
|
use iron::status; |
|
|
|
|
|
|
|
|
extern crate iron; |
|
|
|
|
|
extern crate staticfile; |
|
|
|
|
|
extern crate mount; |
|
|
|
|
|
|
|
|
|
|
|
// This example serves the docs from target/doc/staticfile at /doc/ |
|
|
|
|
|
// |
|
|
|
|
|
// Run `cargo doc && cargo test && ./target/doc_server`, then |
|
|
|
|
|
// point your browser to http://127.0.0.1:3000/doc/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use tantivy::analyzer::StreamingIterator; |
|
|
|
|
|
use rustc_serialize::json::as_pretty_json; |
|
|
use std::path::Path; |
|
|
use std::path::Path; |
|
|
|
|
|
|
|
|
use staticfile::Static; |
|
|
use staticfile::Static; |
|
|
|
|
|
use iron::mime::Mime; |
|
|
use mount::Mount; |
|
|
use mount::Mount; |
|
|
use iron::prelude::*; |
|
|
use iron::prelude::*; |
|
|
|
|
|
|
|
|
fn handle_query(searcher: &Searcher, terms: &Vec<Term>, print_fields: &Vec<TextField>) -> usize { |
|
|
|
|
|
let mut count_collector = CountCollector::new(); |
|
|
|
|
|
let mut first_3_collector = FirstNCollector::with_limit(3); |
|
|
|
|
|
{ |
|
|
|
|
|
let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_3_collector)); |
|
|
|
|
|
searcher.search(&terms, &mut multi_collector); |
|
|
|
|
|
} |
|
|
|
|
|
let mut num_docs = 0; |
|
|
|
|
|
for doc_address in first_3_collector.docs().iter() { |
|
|
|
|
|
let doc = searcher.doc(doc_address).unwrap(); |
|
|
|
|
|
for print_field in print_fields.iter() { |
|
|
|
|
|
for txt in doc.get_texts(print_field) { |
|
|
|
|
|
println!(" - txt: {:?}", txt); |
|
|
|
|
|
|
|
|
#[derive(RustcDecodable, RustcEncodable)] |
|
|
|
|
|
struct Serp { |
|
|
|
|
|
query: String, |
|
|
|
|
|
num_hits: usize, |
|
|
|
|
|
hits: Vec<Hit>, |
|
|
|
|
|
timings: Vec<Timing>, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[derive(RustcDecodable, RustcEncodable)] |
|
|
|
|
|
struct Hit { |
|
|
|
|
|
title: String, |
|
|
|
|
|
body: String, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
lazy_static! { |
|
|
|
|
|
static ref INDEX: Index = { |
|
|
|
|
|
Index::open(&Path::new("/Users/pmasurel/wiki-index/")).unwrap() |
|
|
|
|
|
}; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
fn parse_query(q: &String, field: &TextField) -> Vec<Term> { |
|
|
|
|
|
let tokenizer = SimpleTokenizer::new(); |
|
|
|
|
|
let mut token_it = tokenizer.tokenize(&q); |
|
|
|
|
|
let mut terms = Vec::new(); |
|
|
|
|
|
loop { |
|
|
|
|
|
match token_it.next() { |
|
|
|
|
|
Some(token) => { |
|
|
|
|
|
terms.push(Term::from_field_text(field, &token)); |
|
|
} |
|
|
} |
|
|
|
|
|
None => { break; } |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
terms |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct TimingStarted { |
|
|
|
|
|
name: String, |
|
|
|
|
|
start: PreciseTime, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
impl TimingStarted { |
|
|
|
|
|
fn new(name: &str) -> TimingStarted { |
|
|
|
|
|
TimingStarted { |
|
|
|
|
|
name: String::from(name), |
|
|
|
|
|
start: PreciseTime::now(), |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
fn stop(self) -> Timing { |
|
|
|
|
|
let stop = PreciseTime::now(); |
|
|
|
|
|
Timing { |
|
|
|
|
|
name: self.name, |
|
|
|
|
|
duration: self.start.to(stop).num_microseconds().unwrap(), |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
count_collector.count() |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// fn hello_world(_: &mut Request) -> IronResult<Response> { |
|
|
|
|
|
// Ok(Response::with((iron::status::Ok, "Hello World"))) |
|
|
|
|
|
// } |
|
|
|
|
|
|
|
|
#[derive(RustcDecodable, RustcEncodable)] |
|
|
|
|
|
struct Timing { |
|
|
|
|
|
name: String, |
|
|
|
|
|
duration: i64, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn search(req: &mut Request) -> IronResult<Response> { |
|
|
fn search(req: &mut Request) -> IronResult<Response> { |
|
|
// Extract the decoded data as hashmap, using the UrlEncodedQuery plugin. |
|
|
|
|
|
|
|
|
let mut timings = Vec::new(); |
|
|
match req.get_ref::<UrlEncodedQuery>() { |
|
|
match req.get_ref::<UrlEncodedQuery>() { |
|
|
Ok(ref qs_map) => { |
|
|
Ok(ref qs_map) => { |
|
|
println!("Parsed GET request query string:\n {:?}", qs_map); |
|
|
|
|
|
println!("{:?}", qs_map.get("q")); |
|
|
|
|
|
match qs_map.get("q") { |
|
|
match qs_map.get("q") { |
|
|
Some(qs) => { |
|
|
Some(qs) => { |
|
|
Ok(Response::with((status::Ok, format!("Hello!, {:?}", qs)) )) |
|
|
|
|
|
|
|
|
let query = qs[0].clone(); |
|
|
|
|
|
let search_timing = TimingStarted::new("search"); |
|
|
|
|
|
let searcher = INDEX.searcher().unwrap(); |
|
|
|
|
|
let schema = INDEX.schema(); |
|
|
|
|
|
let title_field = schema.text_field("title"); |
|
|
|
|
|
let body_field = schema.text_field("body"); |
|
|
|
|
|
let terms = parse_query(&query, &body_field); |
|
|
|
|
|
let mut count_collector = CountCollector::new(); |
|
|
|
|
|
let mut first_collector = FirstNCollector::with_limit(10); |
|
|
|
|
|
{ |
|
|
|
|
|
let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_collector)); |
|
|
|
|
|
let timings = searcher.search(&terms, &mut multi_collector).unwrap(); |
|
|
|
|
|
println!("{:?}", timings); |
|
|
|
|
|
} |
|
|
|
|
|
timings.push(search_timing.stop()); |
|
|
|
|
|
let storage_timing = TimingStarted::new("store"); |
|
|
|
|
|
let hits: Vec<Hit> = first_collector |
|
|
|
|
|
.docs() |
|
|
|
|
|
.iter() |
|
|
|
|
|
.map(|doc_address| searcher.doc(doc_address).unwrap()) |
|
|
|
|
|
.map(|doc| |
|
|
|
|
|
Hit { |
|
|
|
|
|
title: doc.get_first_text(&title_field).unwrap().clone(), |
|
|
|
|
|
body: doc.get_first_text(&body_field).unwrap().clone(), |
|
|
|
|
|
}) |
|
|
|
|
|
.collect(); |
|
|
|
|
|
timings.push(storage_timing.stop()); |
|
|
|
|
|
let response = Serp { |
|
|
|
|
|
query: query, |
|
|
|
|
|
hits: hits, |
|
|
|
|
|
num_hits: count_collector.count(), |
|
|
|
|
|
timings: timings, |
|
|
|
|
|
}; |
|
|
|
|
|
let resp_json = as_pretty_json(&response).indent(4); |
|
|
|
|
|
let content_type = "application/json".parse::<Mime>().unwrap(); |
|
|
|
|
|
Ok( |
|
|
|
|
|
Response::with((content_type, status::Ok, format!("{}", resp_json))) |
|
|
|
|
|
) |
|
|
} |
|
|
} |
|
|
None => { |
|
|
None => { |
|
|
Ok(Response::with((status::BadRequest, "Query not defined"))) |
|
|
Ok(Response::with((status::BadRequest, "Query not defined"))) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
Err(ref e) => Ok(Response::with((status::BadRequest, "Failed to parse query string"))) |
|
|
|
|
|
|
|
|
Err(_) => Ok(Response::with((status::BadRequest, "Failed to parse query string"))) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
fn main() { |
|
|
fn main() { |
|
|
// let directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap(); |
|
|
|
|
|
// let schema = directory.schema(); |
|
|
|
|
|
// let url_field = schema.field("url").unwrap(); |
|
|
|
|
|
// let title_field = schema.field("title").unwrap(); |
|
|
|
|
|
// let body_field = schema.field("body").unwrap(); |
|
|
|
|
|
// let print_fields = vec!(title_field, url_field); |
|
|
|
|
|
// |
|
|
|
|
|
// let mut directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap(); |
|
|
|
|
|
// let searcher = Searcher::for_directory(directory); |
|
|
|
|
|
// let tokenizer = SimpleTokenizer::new(); |
|
|
|
|
|
// |
|
|
|
|
|
// println!("Ready"); |
|
|
|
|
|
// let stdin = io::stdin(); |
|
|
|
|
|
// loop { |
|
|
|
|
|
// let mut input = String::new(); |
|
|
|
|
|
// print!("> "); |
|
|
|
|
|
// stdin.read_line(&mut input); |
|
|
|
|
|
// if input == "exit\n" { |
|
|
|
|
|
// break; |
|
|
|
|
|
// } |
|
|
|
|
|
// let mut terms: Vec<Term> = Vec::new(); |
|
|
|
|
|
// let mut token_it = tokenizer.tokenize(&input); |
|
|
|
|
|
// loop { |
|
|
|
|
|
// match token_it.next() { |
|
|
|
|
|
// Some(token) => { |
|
|
|
|
|
// terms.push(Term::from_field_text(&body_field, &token)); |
|
|
|
|
|
// } |
|
|
|
|
|
// None => { break; } |
|
|
|
|
|
// } |
|
|
|
|
|
// } |
|
|
|
|
|
// println!("Input: {:?}", input); |
|
|
|
|
|
// println!("Keywords {:?}", terms); |
|
|
|
|
|
// let start = PreciseTime::now(); |
|
|
|
|
|
// let num_docs = handle_query(&searcher, &terms, &print_fields); |
|
|
|
|
|
// let stop = PreciseTime::now(); |
|
|
|
|
|
// println!("Elasped time {:?} microseconds", start.to(stop).num_microseconds().unwrap()); |
|
|
|
|
|
// println!("Num_docs {:?}", num_docs); |
|
|
|
|
|
|
|
|
|
|
|
let mut mount = Mount::new(); |
|
|
let mut mount = Mount::new(); |
|
|
mount.mount("/", search); |
|
|
|
|
|
mount.mount("/static/", Static::new(Path::new("static/"))); |
|
|
|
|
|
|
|
|
mount.mount("/api", search); |
|
|
|
|
|
mount.mount("/", Static::new(Path::new("static/"))); |
|
|
|
|
|
println!("Running on 3000"); |
|
|
Iron::new(mount).http("127.0.0.1:3000").unwrap(); |
|
|
Iron::new(mount).http("127.0.0.1:3000").unwrap(); |
|
|
} |
|
|
} |