You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
1.7KB

  1. #![allow(unused)]
  2. use std::io::{self, prelude::*};
  3. use std::fs;
  4. use std::time::*;
  5. use std::env;
  6. use std::path::Path;
  7. use elasticlunr::{Index, Language};
  8. use serde::Deserialize;
  9. use bytelines::ByteLinesReader;
  10. #[derive(Deserialize, Debug)]
  11. struct Record {
  12. pub title: String,
  13. pub body: String,
  14. pub url: String,
  15. }
  16. const WIKIPEDIA_PATH: &str = "src/search/wiki-articles.json";
  17. const OUTPUT_PATH: &str = "src/search/js/elasticlunr-wiki-index.json";
  18. fn main() -> Result<(), io::Error> {
  19. let start = Instant::now();
  20. let mut index = Index::new(&["title", "body"]);
  21. let home: String = env::var("HOME").unwrap();
  22. let home_dir = Path::new(&home);
  23. let input_file = home_dir.join(WIKIPEDIA_PATH);
  24. assert!(input_file.exists(), "path does not exist: {}", input_file.display());
  25. let mut rdr = io::BufReader::new(fs::File::open(input_file)?);
  26. let mut lines = rdr.byte_lines();
  27. let mut n_indexed = 0;
  28. while let Some(line_result) = lines.next() {
  29. let line = line_result?;
  30. let rec: Record =
  31. serde_json::from_slice(line)
  32. .map_err(|e| {
  33. io::Error::new(
  34. io::ErrorKind::InvalidData,
  35. format!("deserializing failed: {}", e)
  36. )
  37. })?;
  38. index.add_doc(&rec.url, &[&rec.title, &rec.body]);
  39. n_indexed += 1;
  40. }
  41. let output_file = home_dir.join(OUTPUT_PATH);
  42. let mut wtr = io::BufWriter::new(fs::File::create(&output_file)?);
  43. // hope you have plenty of ram!
  44. let index_json: String = index.to_json();
  45. wtr.write_all(index_json.as_bytes())?;
  46. let took = Instant::now() - start;
  47. println!("indexed {} records in {:?}", n_indexed, took);
  48. Ok(())
  49. }