You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.5KB

  1. extern crate tantivy;
  2. extern crate time;
  3. use tantivy::core::collector::{CountCollector, FirstNCollector, MultiCollector};
  4. use tantivy::core::schema::*;
  5. use tantivy::core::searcher::Searcher;
  6. use tantivy::core::directory::Directory;
  7. use std::io;
  8. use std::convert::From;
  9. use std::path::PathBuf;
  10. use tantivy::core::analyzer::*;
  11. use std::io::BufRead;
  12. use time::PreciseTime;
  13. fn handle_query(searcher: &Searcher, terms: &Vec<Term>, print_fields: &Vec<Field>) -> usize {
  14. let mut count_collector = CountCollector::new();
  15. let mut first_3_collector = FirstNCollector::with_limit(3);
  16. {
  17. let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_3_collector));
  18. searcher.search(&terms, &mut multi_collector);
  19. }
  20. let mut num_docs = 0;
  21. for doc_address in first_3_collector.docs().iter() {
  22. let doc = searcher.get_doc(doc_address);
  23. for print_field in print_fields.iter() {
  24. for txt in doc.get(print_field) {
  25. println!(" - txt: {:?}", txt);
  26. }
  27. }
  28. }
  29. count_collector.count()
  30. }
  31. fn main() {
  32. let directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap();
  33. let schema = directory.schema();
  34. let url_field = schema.field("url").unwrap();
  35. let title_field = schema.field("title").unwrap();
  36. let body_field = schema.field("body").unwrap();
  37. let print_fields = vec!(title_field, url_field);
  38. let mut directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap();
  39. let searcher = Searcher::for_directory(directory);
  40. let tokenizer = SimpleTokenizer::new();
  41. println!("Ready");
  42. let stdin = io::stdin();
  43. loop {
  44. let mut input = String::new();
  45. print!("> ");
  46. stdin.read_line(&mut input);
  47. if input == "exit\n" {
  48. break;
  49. }
  50. let mut terms: Vec<Term> = Vec::new();
  51. let mut token_it = tokenizer.tokenize(&input);
  52. loop {
  53. match token_it.next() {
  54. Some(token) => {
  55. terms.push(Term::from_field_text(&body_field, &token));
  56. }
  57. None => { break; }
  58. }
  59. }
  60. println!("Input: {:?}", input);
  61. println!("Keywords {:?}", terms);
  62. let start = PreciseTime::now();
  63. let num_docs = handle_query(&searcher, &terms, &print_fields);
  64. let stop = PreciseTime::now();
  65. println!("Elasped time {:?} microseconds", start.to(stop).num_microseconds().unwrap());
  66. println!("Num_docs {:?}", num_docs);
  67. }
  68. }