You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

80 lines
2.7KB

  1. extern crate tantivy;
  2. extern crate time;
  3. use std::fs::File;
  4. use std::io::BufReader;
  5. use std::io::Read;
  6. use tantivy::core::postings::VecPostings;
  7. use tantivy::core::postings::Postings;
  8. use tantivy::core::collector::TestCollector;
  9. use tantivy::core::serial::*;
  10. use tantivy::core::schema::*;
  11. use tantivy::core::codec::SimpleCodec;
  12. use tantivy::core::global::*;
  13. use tantivy::core::searcher::Searcher;
  14. use tantivy::core::directory::{Directory, generate_segment_name, SegmentId};
  15. use std::ops::DerefMut;
  16. use tantivy::core::reader::SegmentReader;
  17. use std::io::{ BufWriter, Write};
  18. use std::io;
  19. use std::convert::From;
  20. use std::path::PathBuf;
  21. use tantivy::core::query;
  22. use tantivy::core::query::parse_query;
  23. use tantivy::core::analyzer::SimpleTokenizer;
  24. use std::borrow::Borrow;
  25. use std::io::BufRead;
  26. use std::fs;
  27. use std::io::Cursor;
  28. use time::PreciseTime;
  29. fn count_docs(searcher: &Searcher, terms: &Vec<Term>) -> usize {
  30. // let terms = vec!(, Term::from_field_text(&body_field, "france"));
  31. let mut collector = TestCollector::new();
  32. searcher.search(&terms, &mut collector);
  33. let mut num_docs = 0;
  34. for doc_id in collector.docs().iter() {
  35. num_docs += 1;
  36. }
  37. num_docs
  38. }
  39. fn main() {
  40. let str_fieldtype = FieldOptions::new();
  41. let text_fieldtype = FieldOptions::new().set_tokenized_indexed();
  42. let mut schema = Schema::new();
  43. let id_field = schema.add_field("id", &str_fieldtype);
  44. let url_field = schema.add_field("url", &str_fieldtype);
  45. let title_field = schema.add_field("title", &text_fieldtype);
  46. let body_field = schema.add_field("body", &text_fieldtype);
  47. let mut directory = Directory::open(&PathBuf::from("/media/ssd/wikiindex")).unwrap();
  48. directory.set_schema(&schema);
  49. let searcher = Searcher::for_directory(directory);
  50. let tokenizer = SimpleTokenizer::new();
  51. let mut stdin = io::stdin();
  52. 'mainloop: loop {
  53. let mut input = String::new();
  54. print!("> ");
  55. stdin.read_line(&mut input);
  56. if input == "exit\n" {
  57. break 'mainloop;
  58. }
  59. let mut terms: Vec<Term> = Vec::new();
  60. let mut token_it = tokenizer.tokenize(&input);
  61. let mut term_buffer = String::new();
  62. while token_it.read_one(&mut term_buffer) {
  63. terms.push(Term::from_field_text(&body_field, &term_buffer));
  64. }
  65. // let terms = keywords.iter().map(|s| Term::from_field_text(&body_field, &s));
  66. println!("Input: {:?}", input);
  67. println!("Keywords {:?}", terms);
  68. let start = PreciseTime::now();
  69. let num_docs = count_docs(&searcher, &terms);
  70. let stop = PreciseTime::now();
  71. println!("Elasped time {:?}", start.to(stop));
  72. println!("Num_docs {:?}", num_docs);
  73. }
  74. }