You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.5KB

  1. use clap::ArgMatches;
  2. use serde_json;
  3. use std::str::FromStr;
  4. use std::convert::From;
  5. use std::path::Path;
  6. use std::path::PathBuf;
  7. use tantivy;
  8. use tantivy::query::QueryParser;
  9. use tantivy::schema::Field;
  10. use tantivy::schema::FieldType;
  11. use tantivy::Index;
  12. use tantivy::collector::{Count, TopDocs};
  13. use tantivy::Document;
  14. pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
  15. let index_directory = PathBuf::from(matches.value_of("index").unwrap());
  16. let query = matches.value_of("query").unwrap();
  17. match matches.value_of("num_hits") {
  18. Some(num_hits_str) => {
  19. let num_hits: usize = FromStr::from_str(num_hits_str)
  20. .map_err(|e| { format!("Failed to parse --num_hits (got '{}', expected integer): {}", num_hits_str, e) })?;
  21. run_top_search(&index_directory, &query, num_hits)
  22. .map_err(|e| format!("{:?}", e))
  23. }
  24. None => {
  25. run_search(&index_directory, &query).map_err(|e| format!("{:?}", e))
  26. }
  27. }
  28. }
  29. fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
  30. let index = Index::open_in_dir(directory)?;
  31. let schema = index.schema();
  32. let default_fields: Vec<Field> = schema
  33. .fields()
  34. .filter(|&(_, ref field_entry)| match *field_entry.field_type() {
  35. FieldType::Str(ref text_field_options) => {
  36. text_field_options.get_indexing_options().is_some()
  37. }
  38. _ => false,
  39. })
  40. .map(|(field, _)| field)
  41. .collect();
  42. let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
  43. let query = query_parser.parse_query(query)?;
  44. let searcher = index.reader()?.searcher();
  45. let weight = query.weight(&searcher, false)?;
  46. let schema = index.schema();
  47. for segment_reader in searcher.segment_readers() {
  48. let mut scorer = weight.scorer(segment_reader, 1.0)?;
  49. let store_reader = segment_reader.get_store_reader();
  50. while scorer.advance() {
  51. let doc_id = scorer.doc();
  52. let doc = store_reader.get(doc_id)?;
  53. let named_doc = schema.to_named_doc(&doc);
  54. println!("{}", serde_json::to_string(&named_doc).unwrap());
  55. }
  56. }
  57. Ok(())
  58. }
  59. fn run_top_search(directory: &Path, query: &str, num_hits: usize) -> tantivy::Result<()> {
  60. let index = Index::open_in_dir(directory)?;
  61. let schema = index.schema();
  62. let default_fields: Vec<Field> = schema
  63. .fields()
  64. .filter(|(_, field_entry)| {
  65. match field_entry.field_type() {
  66. FieldType::Str(ref text_field_options) => {
  67. text_field_options.get_indexing_options().is_some()
  68. }
  69. _ => false,
  70. }
  71. })
  72. .map(|(field, _)| field)
  73. .collect();
  74. let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
  75. let query = query_parser.parse_query(query)?;
  76. let searcher = index.reader()?.searcher();
  77. let (top_docs, num_hits) = searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?;
  78. let mut out = String::with_capacity(1024);
  79. top_docs
  80. .iter()
  81. .take(num_hits)
  82. .for_each(|(_score, doc_address)| {
  83. let doc: Document = searcher.doc(*doc_address).unwrap();
  84. let named_doc = schema.to_named_doc(&doc);
  85. let json_doc: String = serde_json::to_string(&named_doc).unwrap();
  86. out.push_str(&format!("{}\n", json_doc));
  87. });
  88. print!("{}", out);
  89. Ok(())
  90. }