You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
3.8KB

  1. use clap::ArgMatches;
  2. use std::fs::File;
  3. use std::io;
  4. use std::io::BufRead;
  5. use std::io::BufReader;
  6. use std::path::Path;
  7. use std::path::PathBuf;
  8. use tantivy::collector::{Count, TopDocs};
  9. use tantivy::query::QueryParser;
  10. use tantivy::schema::{Field, Schema};
  11. use tantivy::Index;
  12. use crate::timer::TimerTree;
  13. pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
  14. let index_path = PathBuf::from(matches.value_of("index").unwrap());
  15. let queries_path = PathBuf::from(matches.value_of("queries").unwrap()); // the unwrap is safe as long as it is comming from the main cli.
  16. let num_repeat = value_t!(matches, "num_repeat", usize)
  17. .map_err(|e| format!("Failed to read num_repeat argument as an integer. {:?}", e))?;
  18. run_bench(&index_path, &queries_path, num_repeat).map_err(From::from)
  19. }
  20. fn extract_search_fields(schema: &Schema) -> Vec<Field> {
  21. schema
  22. .fields()
  23. .iter()
  24. .enumerate()
  25. .filter(|&(_, field_entry)| field_entry.is_indexed())
  26. .map(|(field_id, _)| Field(field_id as u32))
  27. .collect()
  28. }
  29. fn read_query_file(query_path: &Path) -> io::Result<Vec<String>> {
  30. let query_file: File = File::open(&query_path)?;
  31. let file = BufReader::new(&query_file);
  32. let mut queries = Vec::new();
  33. for line_res in file.lines() {
  34. queries.push(line_res?);
  35. }
  36. Ok(queries)
  37. }
  38. fn run_bench(index_path: &Path, query_filepath: &Path, num_repeat: usize) -> Result<(), String> {
  39. println!("index_path : {:?}", index_path);
  40. println!("Query : {:?}", index_path);
  41. println!("-------------------------------\n\n\n");
  42. let index =
  43. Index::open_in_dir(index_path).map_err(|e| format!("Failed to open index.\n{:?}", e))?;
  44. let searcher = index
  45. .reader()
  46. .map_err(|err| format!("{:?}", err))?
  47. .searcher();
  48. let default_search_fields: Vec<Field> = extract_search_fields(&index.schema());
  49. let queries = read_query_file(query_filepath)
  50. .map_err(|e| format!("Failed reading the query file: {}", e))?;
  51. let query_parser = QueryParser::new(
  52. index.schema(),
  53. default_search_fields,
  54. index.tokenizers().clone(),
  55. );
  56. println!("SEARCH\n");
  57. println!(
  58. "{}\t{}\t{}\t{}",
  59. "query", "num_terms", "num hits", "time in microsecs"
  60. );
  61. for _ in 0..num_repeat {
  62. for query_txt in &queries {
  63. let query = query_parser.parse_query(&query_txt).unwrap();
  64. let mut timing = TimerTree::default();
  65. let (_top_docs, count) = {
  66. let _search = timing.open("search");
  67. searcher
  68. .search(&query, &(TopDocs::with_limit(10), Count))
  69. .map_err(|e| {
  70. format!("Failed while searching query {:?}.\n\n{:?}", query_txt, e)
  71. })?
  72. };
  73. println!("{}\t{}\t{}", query_txt, count, timing.total_time());
  74. }
  75. }
  76. println!("\n\nFETCH STORE\n");
  77. println!("{}\t{}", "query", "time in microsecs");
  78. for _ in 0..num_repeat {
  79. for query_txt in &queries {
  80. let query = query_parser.parse_query(&query_txt).unwrap();
  81. let top_docs = searcher
  82. .search(&*query, &TopDocs::with_limit(10))
  83. .map_err(|e| {
  84. format!(
  85. "Failed while retrieving document for query {:?}.\n{:?}",
  86. query, e
  87. )
  88. })?;
  89. let mut timer = TimerTree::default();
  90. {
  91. let _scoped_timer_ = timer.open("total");
  92. for (_score, doc_address) in top_docs {
  93. searcher.doc(doc_address).unwrap();
  94. }
  95. }
  96. println!("{}\t{}", query_txt, timer.total_time());
  97. }
  98. }
  99. Ok(())
  100. }