/// This tantivy command starts a http server (by default on port 3000) /// /// Currently the only entrypoint is /api/ /// and it takes the following query string argument /// /// - `q=` : your query /// - `nhits`: the number of hits that should be returned. (default to 10) /// /// /// For instance, the following call should return the 20 most relevant /// hits for fulmicoton. /// /// http://localhost:3000/api/?q=fulmicoton&&nhits=20 /// use clap::ArgMatches; use iron::mime::Mime; use iron::prelude::*; use iron::status; use iron::typemap::Key; use mount::Mount; use persistent::Read; use serde_json; use std::convert::From; use std::error::Error; use std::fmt::{self, Debug}; use std::path::Path; use std::path::PathBuf; use std::str::FromStr; use tantivy; use tantivy::collector::{Count, TopDocs}; use tantivy::query::QueryParser; use tantivy::schema::Field; use tantivy::schema::FieldType; use tantivy::schema::NamedFieldDocument; use tantivy::schema::Schema; use tantivy::tokenizer::*; use tantivy::{DocAddress, Score}; use tantivy::Document; use tantivy::Index; use tantivy::IndexReader; use crate::timer::TimerTree; use urlencoded::UrlEncodedQuery; pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> { let index_directory = PathBuf::from(matches.value_of("index").unwrap()); let port = value_t!(matches, "port", u16).unwrap_or(3000u16); let host_str = matches.value_of("host").unwrap_or("localhost"); let host = format!("{}:{}", host_str, port); run_serve(index_directory, &host).map_err(|e| format!("{:?}", e)) } #[derive(Serialize)] struct Serp { q: String, num_hits: usize, hits: Vec, timings: TimerTree, } #[derive(Serialize)] struct Hit { score: Score, doc: NamedFieldDocument, id: u32, } struct IndexServer { reader: IndexReader, query_parser: QueryParser, schema: Schema, } impl IndexServer { fn load(path: &Path) -> IndexServer { let index = Index::open_in_dir(path).unwrap(); index.tokenizers().register( "commoncrawl", TextAnalyzer::from(SimpleTokenizer) .filter(RemoveLongFilter::limit(40)) .filter(LowerCaser) .filter(AlphaNumOnlyFilter) .filter(Stemmer::new(Language::English)), ); let schema = index.schema(); let default_fields: Vec = schema .fields() .filter(|&(_, ref field_entry)| match *field_entry.field_type() { FieldType::Str(ref text_field_options) => { text_field_options.get_indexing_options().is_some() } _ => false, }) .map(|(field, _)| field) .collect(); let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone()); let reader = index.reader().unwrap(); IndexServer { reader, query_parser, schema, } } fn create_hit(&self, score: Score, doc: &Document, doc_address: DocAddress) -> Hit { Hit { score, doc: self.schema.to_named_doc(&doc), id: doc_address.doc(), } } fn search(&self, q: String, num_hits: usize) -> tantivy::Result { let query = self .query_parser .parse_query(&q) .expect("Parsing the query failed"); let searcher = self.reader.searcher(); let mut timer_tree = TimerTree::default(); let (top_docs, num_hits) = { let _search_timer = timer_tree.open("search"); searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))? }; let hits: Vec = { let _fetching_timer = timer_tree.open("fetching docs"); top_docs .iter() .map(|(score, doc_address)| { let doc: Document = searcher.doc(*doc_address).unwrap(); self.create_hit(*score, &doc, *doc_address) }) .collect() }; Ok(Serp { q, num_hits, hits, timings: timer_tree, }) } } impl Key for IndexServer { type Value = IndexServer; } #[derive(Debug)] struct StringError(String); impl fmt::Display for StringError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { Debug::fmt(self, f) } } impl Error for StringError { fn description(&self) -> &str { &*self.0 } } fn search(req: &mut Request) -> IronResult { let index_server = req.get::>().unwrap(); req.get_ref::() .map_err(|_| { IronError::new( StringError(String::from("Failed to decode error")), status::BadRequest, ) }) .and_then(|ref qs_map| { let num_hits: usize = qs_map .get("nhits") .and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok()) .unwrap_or(10); let query = qs_map.get("q").ok_or_else(|| { IronError::new( StringError(String::from("Parameter q is missing from the query")), status::BadRequest, ) })?[0] .clone(); let serp = index_server.search(query, num_hits).unwrap(); let resp_json = serde_json::to_string_pretty(&serp).unwrap(); let content_type = "application/json".parse::().unwrap(); Ok(Response::with(( content_type, status::Ok, format!("{}", resp_json), ))) }) } fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> { let mut mount = Mount::new(); let server = IndexServer::load(&directory); mount.mount("/api", search); let mut middleware = Chain::new(mount); middleware.link(Read::::both(server)); println!("listening on http://{}", host); Iron::new(middleware).http(host).unwrap(); Ok(()) }