You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
6.3KB

  1. /// This tantivy command starts a http server (by default on port 3000)
  2. ///
  3. /// Currently the only entrypoint is /api/
  4. /// and it takes the following query string argument
  5. ///
  6. /// - `q=` : your query
  7. // - `nhits`: the number of hits that should be returned. (default to 10)
  8. /// - `explain=` : if true returns some information about the score.
  9. ///
  10. ///
  11. /// For instance, the following call should return the 20 most relevant
  12. /// hits for fulmicoton.
  13. ///
  14. /// http://localhost:3000/api/?q=fulmicoton&explain=false&nhits=20
  15. ///
  16. use clap::ArgMatches;
  17. use iron::mime::Mime;
  18. use iron::prelude::*;
  19. use iron::status;
  20. use iron::typemap::Key;
  21. use mount::Mount;
  22. use persistent::Read;
  23. use rustc_serialize::json::as_pretty_json;
  24. use std::convert::From;
  25. use std::error::Error;
  26. use std::fmt::{self, Debug};
  27. use std::path::Path;
  28. use std::path::PathBuf;
  29. use std::str::FromStr;
  30. use tantivy;
  31. use tantivy::collector;
  32. use tantivy::collector::CountCollector;
  33. use tantivy::collector::TopCollector;
  34. use tantivy::Document;
  35. use tantivy::Index;
  36. use tantivy::query::Explanation;
  37. use tantivy::query::Query;
  38. use tantivy::query::QueryParser;
  39. use tantivy::Result;
  40. use tantivy::schema::Field;
  41. use tantivy::schema::FieldType;
  42. use tantivy::schema::NamedFieldDocument;
  43. use tantivy::schema::Schema;
  44. use tantivy::TimerTree;
  45. use urlencoded::UrlEncodedQuery;
  46. pub fn run_serve_cli(matches: &ArgMatches) -> tantivy::Result<()> {
  47. let index_directory = PathBuf::from(matches.value_of("index").unwrap());
  48. let port = value_t!(matches, "port", u16).unwrap_or(3000u16);
  49. let host_str = matches.value_of("host").unwrap_or("localhost");
  50. let host = format!("{}:{}", host_str, port);
  51. run_serve(index_directory, &host)
  52. }
  53. #[derive(RustcEncodable)]
  54. struct Serp {
  55. q: String,
  56. num_hits: usize,
  57. hits: Vec<Hit>,
  58. timings: TimerTree,
  59. }
  60. #[derive(RustcEncodable)]
  61. struct Hit {
  62. doc: NamedFieldDocument,
  63. explain: Option<Explanation>,
  64. }
  65. struct IndexServer {
  66. index: Index,
  67. query_parser: QueryParser,
  68. schema: Schema,
  69. }
  70. impl IndexServer {
  71. fn load(path: &Path) -> IndexServer {
  72. let index = Index::open(path).unwrap();
  73. let schema = index.schema();
  74. let default_fields: Vec<Field> = schema
  75. .fields()
  76. .iter()
  77. .enumerate()
  78. .filter(
  79. |&(_, ref field_entry)| {
  80. match *field_entry.field_type() {
  81. FieldType::Str(_) => true,
  82. FieldType::U32(_) => false
  83. }
  84. }
  85. )
  86. .map(|(i, _)| Field(i as u8))
  87. .collect();
  88. let query_parser = QueryParser::new(schema.clone(), default_fields);
  89. IndexServer {
  90. index: index,
  91. query_parser: query_parser,
  92. schema: schema,
  93. }
  94. }
  95. fn create_hit(&self, doc: &Document, explain: Option<Explanation>) -> Hit {
  96. Hit {
  97. doc: self.schema.to_named_doc(&doc),
  98. explain: explain,
  99. }
  100. }
  101. fn search(&self, q: String, num_hits: usize, explain: bool) -> Result<Serp> {
  102. let query = self.query_parser.parse_query(&q).unwrap();
  103. let searcher = self.index.searcher().unwrap();
  104. let mut count_collector = CountCollector::new();
  105. let mut top_collector = TopCollector::with_limit(num_hits);
  106. let mut timer_tree = TimerTree::new();
  107. {
  108. let _search_timer = timer_tree.open("search");
  109. let mut chained_collector = collector::chain()
  110. .add(&mut top_collector)
  111. .add(&mut count_collector);
  112. try!(query.search(&searcher, &mut chained_collector));
  113. }
  114. let hits: Vec<Hit> = {
  115. let _fetching_timer = timer_tree.open("fetching docs");
  116. top_collector.docs()
  117. .iter()
  118. .map(|doc_address| {
  119. let doc: Document = searcher.doc(doc_address).unwrap();
  120. let explanation;
  121. if explain {
  122. explanation = Some(query.explain(&searcher, doc_address).unwrap());
  123. }
  124. else {
  125. explanation = None;
  126. }
  127. self.create_hit(&doc, explanation)
  128. })
  129. .collect()
  130. };
  131. Ok(Serp {
  132. q: q,
  133. num_hits: count_collector.count(),
  134. hits: hits,
  135. timings: timer_tree,
  136. })
  137. }
  138. }
  139. impl Key for IndexServer {
  140. type Value = IndexServer;
  141. }
  142. #[derive(Debug)]
  143. struct StringError(String);
  144. impl fmt::Display for StringError {
  145. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  146. Debug::fmt(self, f)
  147. }
  148. }
  149. impl Error for StringError {
  150. fn description(&self) -> &str { &*self.0 }
  151. }
  152. fn search(req: &mut Request) -> IronResult<Response> {
  153. let index_server = req.get::<Read<IndexServer>>().unwrap();
  154. req.get_ref::<UrlEncodedQuery>()
  155. .map_err(|_| IronError::new(StringError(String::from("Failed to decode error")), status::BadRequest))
  156. .and_then(|ref qs_map| {
  157. let num_hits: usize = qs_map
  158. .get("nhits")
  159. .and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
  160. .unwrap_or(10);
  161. let explain: bool = qs_map
  162. .get("explain")
  163. .map(|s| &s[0] == &"true")
  164. .unwrap_or(false);
  165. let query = try!(qs_map
  166. .get("q")
  167. .ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest)))[0].clone();
  168. let serp = index_server.search(query, num_hits, explain).unwrap();
  169. let resp_json = as_pretty_json(&serp).indent(4);
  170. let content_type = "application/json".parse::<Mime>().unwrap();
  171. Ok(Response::with((content_type, status::Ok, format!("{}", resp_json))))
  172. })
  173. }
  174. fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> {
  175. let mut mount = Mount::new();
  176. let server = IndexServer::load(&directory);
  177. mount.mount("/api", search);
  178. let mut middleware = Chain::new(mount);
  179. middleware.link(Read::<IndexServer>::both(server));
  180. println!("listening on http://{}", host);
  181. Iron::new(middleware).http(host).unwrap();
  182. Ok(())
  183. }