|
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- #![allow(unused)]
- use std::io::{self, prelude::*};
- use std::fs;
- use std::time::*;
- use std::env;
- use std::path::Path;
- use elasticlunr::{Index, Language};
- use serde::Deserialize;
- use bytelines::ByteLinesReader;
-
- #[derive(Deserialize, Debug)]
- struct Record {
- pub title: String,
- pub body: String,
- pub url: String,
- }
-
- const WIKIPEDIA_PATH: &str = "src/search/wiki-articles.json";
- const OUTPUT_PATH: &str = "src/search/js/elasticlunr-wiki-index.json";
-
- fn main() -> Result<(), io::Error> {
- let start = Instant::now();
-
- let mut index = Index::new(&["title", "body"]);
-
- let home: String = env::var("HOME").unwrap();
- let home_dir = Path::new(&home);
- let input_file = home_dir.join(WIKIPEDIA_PATH);
- assert!(input_file.exists(), "path does not exist: {}", input_file.display());
-
- let mut rdr = io::BufReader::new(fs::File::open(input_file)?);
- let mut lines = rdr.byte_lines();
-
- let mut n_indexed = 0;
-
- while let Some(line_result) = lines.next() {
- let line = line_result?;
- let rec: Record =
- serde_json::from_slice(line)
- .map_err(|e| {
- io::Error::new(
- io::ErrorKind::InvalidData,
- format!("deserializing failed: {}", e)
- )
- })?;
- index.add_doc(&rec.url, &[&rec.title, &rec.body]);
- n_indexed += 1;
- }
-
- let output_file = home_dir.join(OUTPUT_PATH);
- let mut wtr = io::BufWriter::new(fs::File::create(&output_file)?);
-
- // hope you have plenty of ram!
- let index_json: String = index.to_json();
-
- wtr.write_all(index_json.as_bytes())?;
-
- let took = Instant::now() - start;
- println!("indexed {} records in {:?}", n_indexed, took);
- Ok(())
- }
|