You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

193 lines
5.7KB

  1. /// This tantivy command starts a http server (by default on port 3000)
  2. ///
  3. /// Currently the only entrypoint is /api/
  4. /// and it takes the following query string argument
  5. ///
  6. /// - `q=` : your query
  7. /// - `nhits`: the number of hits that should be returned. (default to 10)
  8. ///
  9. ///
  10. /// For instance, the following call should return the 20 most relevant
  11. /// hits for fulmicoton.
  12. ///
  13. /// http://localhost:3000/api/?q=fulmicoton&&nhits=20
  14. ///
  15. use clap::ArgMatches;
  16. use iron::mime::Mime;
  17. use iron::prelude::*;
  18. use iron::status;
  19. use serde_json;
  20. use iron::typemap::Key;
  21. use mount::Mount;
  22. use persistent::Read;
  23. use std::convert::From;
  24. use std::error::Error;
  25. use std::fmt::{self, Debug};
  26. use std::path::Path;
  27. use std::path::PathBuf;
  28. use std::str::FromStr;
  29. use tantivy;
  30. use tantivy::collector;
  31. use tantivy::collector::CountCollector;
  32. use tantivy::collector::TopCollector;
  33. use tantivy::Document;
  34. use tantivy::Index;
  35. use tantivy::query::QueryParser;
  36. use tantivy::schema::Field;
  37. use tantivy::schema::FieldType;
  38. use tantivy::schema::NamedFieldDocument;
  39. use tantivy::schema::Schema;
  40. use tantivy::TimerTree;
  41. use urlencoded::UrlEncodedQuery;
  42. pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
  43. let index_directory = PathBuf::from(matches.value_of("index").unwrap());
  44. let port = value_t!(matches, "port", u16).unwrap_or(3000u16);
  45. let host_str = matches.value_of("host").unwrap_or("localhost");
  46. let host = format!("{}:{}", host_str, port);
  47. run_serve(index_directory, &host).map_err(|e| format!("{:?}", e))
  48. }
  49. #[derive(Serialize)]
  50. struct Serp {
  51. q: String,
  52. num_hits: usize,
  53. hits: Vec<Hit>,
  54. timings: TimerTree,
  55. }
  56. #[derive(Serialize)]
  57. struct Hit {
  58. doc: NamedFieldDocument,
  59. }
  60. struct IndexServer {
  61. index: Index,
  62. query_parser: QueryParser,
  63. schema: Schema,
  64. }
  65. impl IndexServer {
  66. fn load(path: &Path) -> IndexServer {
  67. let index = Index::open(path).unwrap();
  68. let schema = index.schema();
  69. let default_fields: Vec<Field> = schema
  70. .fields()
  71. .iter()
  72. .enumerate()
  73. .filter(
  74. |&(_, ref field_entry)| {
  75. match *field_entry.field_type() {
  76. FieldType::Str(ref text_field_options) => {
  77. text_field_options.get_indexing_options().is_indexed()
  78. },
  79. _ => false
  80. }
  81. }
  82. )
  83. .map(|(i, _)| Field(i as u32))
  84. .collect();
  85. let query_parser = QueryParser::new(schema.clone(), default_fields);
  86. IndexServer {
  87. index: index,
  88. query_parser: query_parser,
  89. schema: schema,
  90. }
  91. }
  92. fn create_hit(&self, doc: &Document) -> Hit {
  93. Hit {
  94. doc: self.schema.to_named_doc(&doc)
  95. }
  96. }
  97. fn search(&self, q: String, num_hits: usize) -> tantivy::Result<Serp> {
  98. let query = self.query_parser.parse_query(&q).expect("Parsing the query failed");
  99. let searcher = self.index.searcher();
  100. let mut count_collector = CountCollector::default();
  101. let mut top_collector = TopCollector::with_limit(num_hits);
  102. let mut timer_tree = TimerTree::default();
  103. {
  104. let _search_timer = timer_tree.open("search");
  105. let mut chained_collector = collector::chain()
  106. .push(&mut top_collector)
  107. .push(&mut count_collector);
  108. try!(query.search(&searcher, &mut chained_collector));
  109. }
  110. let hits: Vec<Hit> = {
  111. let _fetching_timer = timer_tree.open("fetching docs");
  112. top_collector.docs()
  113. .iter()
  114. .map(|doc_address| {
  115. let doc: Document = searcher.doc(doc_address).unwrap();
  116. self.create_hit(&doc)
  117. })
  118. .collect()
  119. };
  120. Ok(Serp {
  121. q: q,
  122. num_hits: count_collector.count(),
  123. hits: hits,
  124. timings: timer_tree,
  125. })
  126. }
  127. }
  128. impl Key for IndexServer {
  129. type Value = IndexServer;
  130. }
  131. #[derive(Debug)]
  132. struct StringError(String);
  133. impl fmt::Display for StringError {
  134. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  135. Debug::fmt(self, f)
  136. }
  137. }
  138. impl Error for StringError {
  139. fn description(&self) -> &str { &*self.0 }
  140. }
  141. fn search(req: &mut Request) -> IronResult<Response> {
  142. let index_server = req.get::<Read<IndexServer>>().unwrap();
  143. req.get_ref::<UrlEncodedQuery>()
  144. .map_err(|_| IronError::new(StringError(String::from("Failed to decode error")), status::BadRequest))
  145. .and_then(|ref qs_map| {
  146. let num_hits: usize = qs_map
  147. .get("nhits")
  148. .and_then(|nhits_str| usize::from_str(&nhits_str[0]).ok())
  149. .unwrap_or(10);
  150. let query = try!(qs_map
  151. .get("q")
  152. .ok_or_else(|| IronError::new(StringError(String::from("Parameter q is missing from the query")), status::BadRequest)))[0].clone();
  153. let serp = index_server.search(query, num_hits).unwrap();
  154. let resp_json = serde_json::to_string_pretty(&serp).unwrap();
  155. let content_type = "application/json".parse::<Mime>().unwrap();
  156. Ok(Response::with((content_type, status::Ok, format!("{}", resp_json))))
  157. })
  158. }
  159. fn run_serve(directory: PathBuf, host: &str) -> tantivy::Result<()> {
  160. let mut mount = Mount::new();
  161. let server = IndexServer::load(&directory);
  162. mount.mount("/api", search);
  163. let mut middleware = Chain::new(mount);
  164. middleware.link(Read::<IndexServer>::both(server));
  165. println!("listening on http://{}", host);
  166. Iron::new(middleware).http(host).unwrap();
  167. Ok(())
  168. }