You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.9KB

  1. use tantivy::Index;
  2. use tantivy::schema::{Field, Schema};
  3. use tantivy::query::QueryParser;
  4. use tantivy::query::Query;
  5. use std::path::Path;
  6. use tantivy::TimerTree;
  7. use std::io::BufReader;
  8. use std::io::BufRead;
  9. use std::io;
  10. use std::fs::File;
  11. use tantivy::collector::chain;
  12. use tantivy::collector::TopCollector;
  13. use tantivy::collector::CountCollector;
  14. use clap::ArgMatches;
  15. use std::path::PathBuf;
  16. pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
  17. let index_path = PathBuf::from(matches.value_of("index").unwrap());
  18. let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
  19. let num_repeat = try!(value_t!(matches, "num_repeat", usize).map_err(|e|format!("Failed to read num_repeat argument as an integer. {:?}", e)));
  20. run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
  21. }
  22. fn extract_search_fields(schema: &Schema) -> Vec<Field> {
  23. schema.fields()
  24. .iter()
  25. .enumerate()
  26. .filter(|&(_, field_entry)| {
  27. field_entry.is_indexed()
  28. })
  29. .map(|(field_id, _)| field_id as u8)
  30. .map(Field)
  31. .collect()
  32. }
  33. fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
  34. let query_file: File = try!(File::open(&query_path));
  35. let file = BufReader::new(&query_file);
  36. let mut queries = Vec::new();
  37. for line_res in file.lines() {
  38. let line = try!(line_res);
  39. let query = String::from(line.trim());
  40. queries.push(query);
  41. }
  42. Ok(queries)
  43. }
  44. fn run_bench(index_path: &Path,
  45. query_filepath: &Path,
  46. num_repeat: usize) -> Result<(), String> {
  47. println!("index_path : {:?}", index_path);
  48. println!("Query : {:?}", index_path);
  49. println!("-------------------------------\n\n\n");
  50. let index = try!(Index::open(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e)));
  51. let searcher = index.searcher();
  52. let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
  53. let queries = try!(read_query_file(query_filepath).map_err(|e| format!("Failed reading the query file: {}", e)));
  54. let query_parser = QueryParser::new(index.schema(), default_search_fields);
  55. println!("SEARCH\n");
  56. println!("{}\t{}\t{}\t{}", "query", "num_terms", "num hits", "time in microsecs");
  57. for _ in 0..num_repeat {
  58. for query_txt in &queries {
  59. let query = query_parser.parse_query(&query_txt).unwrap();
  60. let num_terms = query.num_terms();
  61. let mut top_collector = TopCollector::with_limit(10);
  62. let mut count_collector = CountCollector::new();
  63. let timing;
  64. {
  65. let mut collector = chain().add(&mut top_collector).add(&mut count_collector);
  66. timing = try!(query.search(&searcher, &mut collector).map_err(|e| format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)));
  67. }
  68. println!("{}\t{}\t{}\t{}", query_txt, num_terms, count_collector.count(), timing.total_time());
  69. }
  70. }
  71. println!("\n\nFETCH STORE\n");
  72. println!("{}\t{}", "query", "time in microsecs");
  73. for _ in 0..num_repeat {
  74. for query_txt in &queries {
  75. let query = query_parser.parse_query(&query_txt).unwrap();
  76. let mut top_collector = TopCollector::with_limit(10);
  77. try!(query.search(&searcher, &mut top_collector).map_err(|e| format!("Failed while retrieving document for query {:?}.\n{:?}", query, e)));
  78. let mut timer = TimerTree::new();
  79. {
  80. let _scoped_timer_ = timer.open("total");
  81. for doc_address in top_collector.docs() {
  82. searcher.doc(&doc_address).unwrap();
  83. }
  84. }
  85. println!("{}\t{}", query_txt, timer.total_time());
  86. }
  87. }
  88. Ok(())
  89. }