You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.7KB

  1. extern crate tantivy;
  2. extern crate time;
  3. use tantivy::core::collector::{CountCollector, FirstNCollector, MultiCollector};
  4. use tantivy::core::schema::*;
  5. use tantivy::core::searcher::Searcher;
  6. use tantivy::core::directory::Directory;
  7. use std::io;
  8. use std::convert::From;
  9. use std::path::PathBuf;
  10. use tantivy::core::analyzer::*;
  11. use std::io::BufRead;
  12. use time::PreciseTime;
  13. fn handle_query(searcher: &Searcher, terms: &Vec<Term>, print_fields: &Vec<Field>) -> usize {
  14. // let mut collector = TestCollector::new();
  15. let mut count_collector = CountCollector::new();
  16. let mut first_3_collector = FirstNCollector::with_limit(3);
  17. {
  18. let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_3_collector));
  19. searcher.search(&terms, &mut multi_collector);
  20. }
  21. let mut num_docs = 0;
  22. for doc_address in first_3_collector.docs().iter() {
  23. let doc = searcher.get_doc(doc_address);
  24. for print_field in print_fields.iter() {
  25. for txt in doc.get(print_field) {
  26. println!(" - txt: {:?}", txt);
  27. }
  28. }
  29. }
  30. count_collector.count()
  31. }
  32. fn main() {
  33. let str_fieldtype = FieldOptions::new();
  34. let text_fieldtype = FieldOptions::new().set_tokenized_indexed();
  35. let mut schema = Schema::new();
  36. let url_field = schema.add_field("url", &str_fieldtype);
  37. let title_field = schema.add_field("title", &text_fieldtype);
  38. let body_field = schema.add_field("body", &text_fieldtype);
  39. let print_fields = vec!(title_field, url_field);
  40. let mut directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap();
  41. directory.set_schema(&schema);
  42. let searcher = Searcher::for_directory(directory);
  43. let tokenizer = SimpleTokenizer::new();
  44. println!("Ready");
  45. let stdin = io::stdin();
  46. loop {
  47. let mut input = String::new();
  48. print!("> ");
  49. stdin.read_line(&mut input);
  50. if input == "exit\n" {
  51. break;
  52. }
  53. let mut terms: Vec<Term> = Vec::new();
  54. let mut token_it = tokenizer.tokenize(&input);
  55. loop {
  56. match token_it.next() {
  57. Some(token) => {
  58. terms.push(Term::from_field_text(&body_field, &token));
  59. }
  60. None => { break; }
  61. }
  62. }
  63. println!("Input: {:?}", input);
  64. println!("Keywords {:?}", terms);
  65. let start = PreciseTime::now();
  66. let num_docs = handle_query(&searcher, &terms, &print_fields);
  67. let stop = PreciseTime::now();
  68. println!("Elasped time {:?} microseconds", start.to(stop).num_microseconds().unwrap());
  69. println!("Num_docs {:?}", num_docs);
  70. }
  71. }