You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
3.7KB

  1. use clap::ArgMatches;
  2. use serde_json;
  3. use std::str::FromStr;
  4. use std::convert::From;
  5. use std::path::Path;
  6. use std::path::PathBuf;
  7. use std::io::Write;
  8. use tantivy;
  9. use tantivy::query::QueryParser;
  10. use tantivy::schema::Field;
  11. use tantivy::schema::FieldType;
  12. use tantivy::Index;
  13. use tantivy::collector::{Count, TopDocs};
  14. use tantivy::Document;
  15. pub fn run_search_cli(matches: &ArgMatches) -> Result<(), String> {
  16. let index_directory = PathBuf::from(matches.value_of("index").unwrap());
  17. let query = matches.value_of("query").unwrap();
  18. match matches.value_of("num_hits") {
  19. Some(num_hits_str) => {
  20. let num_hits: usize = FromStr::from_str(num_hits_str)
  21. .map_err(|e| { format!("Failed to parse --num_hits (got '{}', expected integer): {}", num_hits_str, e) })?;
  22. run_top_search(&index_directory, &query, num_hits)
  23. .map_err(|e| format!("{:?}", e))
  24. }
  25. None => {
  26. run_search(&index_directory, &query).map_err(|e| format!("{:?}", e))
  27. }
  28. }
  29. }
  30. fn run_search(directory: &Path, query: &str) -> tantivy::Result<()> {
  31. let index = Index::open_in_dir(directory)?;
  32. let schema = index.schema();
  33. let default_fields: Vec<Field> = schema
  34. .fields()
  35. .filter(|&(_, ref field_entry)| match *field_entry.field_type() {
  36. FieldType::Str(ref text_field_options) => {
  37. text_field_options.get_indexing_options().is_some()
  38. }
  39. _ => false,
  40. })
  41. .map(|(field, _)| field)
  42. .collect();
  43. let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
  44. let query = query_parser.parse_query(query)?;
  45. let searcher = index.reader()?.searcher();
  46. let weight = query.weight(&searcher, false)?;
  47. let schema = index.schema();
  48. let stdout = std::io::stdout();
  49. let handle = stdout.lock();
  50. let mut wtr = std::io::BufWriter::new(handle);
  51. for segment_reader in searcher.segment_readers() {
  52. let mut scorer = weight.scorer(segment_reader, 1.0)?;
  53. let store_reader = segment_reader.get_store_reader();
  54. while scorer.advance() {
  55. let doc_id = scorer.doc();
  56. let doc = store_reader.get(doc_id)?;
  57. let named_doc = schema.to_named_doc(&doc);
  58. serde_json::to_writer(&mut wtr, &named_doc).unwrap();
  59. wtr.write_all(b"\n")?;
  60. }
  61. }
  62. Ok(())
  63. }
  64. fn run_top_search(directory: &Path, query: &str, num_hits: usize) -> tantivy::Result<()> {
  65. let index = Index::open_in_dir(directory)?;
  66. let schema = index.schema();
  67. let default_fields: Vec<Field> = schema
  68. .fields()
  69. .filter(|(_, field_entry)| {
  70. match field_entry.field_type() {
  71. FieldType::Str(ref text_field_options) => {
  72. text_field_options.get_indexing_options().is_some()
  73. }
  74. _ => false,
  75. }
  76. })
  77. .map(|(field, _)| field)
  78. .collect();
  79. let query_parser = QueryParser::new(schema.clone(), default_fields, index.tokenizers().clone());
  80. let query = query_parser.parse_query(query)?;
  81. let searcher = index.reader()?.searcher();
  82. let (top_docs, num_hits) = searcher.search(&query, &(TopDocs::with_limit(num_hits), Count))?;
  83. let mut out = String::with_capacity(1024);
  84. top_docs
  85. .iter()
  86. .take(num_hits)
  87. .for_each(|(_score, doc_address)| {
  88. let doc: Document = searcher.doc(*doc_address).unwrap();
  89. let named_doc = schema.to_named_doc(&doc);
  90. let json_doc: String = serde_json::to_string(&named_doc).unwrap();
  91. out.push_str(&format!("{}\n", json_doc));
  92. });
  93. print!("{}", out);
  94. Ok(())
  95. }