#![allow(unused)] use std::io::{self, prelude::*}; use std::fs; use std::time::*; use std::env; use std::path::Path; use elasticlunr::{Index, Language}; use serde::Deserialize; use bytelines::ByteLinesReader; #[derive(Deserialize, Debug)] struct Record { pub title: String, pub body: String, pub url: String, } const WIKIPEDIA_PATH: &str = "src/search/wiki-articles.json"; const OUTPUT_PATH: &str = "src/search/js/elasticlunr-wiki-index.json"; fn main() -> Result<(), io::Error> { let start = Instant::now(); let mut index = Index::new(&["title", "body"]); let home: String = env::var("HOME").unwrap(); let home_dir = Path::new(&home); let input_file = home_dir.join(WIKIPEDIA_PATH); assert!(input_file.exists(), "path does not exist: {}", input_file.display()); let mut rdr = io::BufReader::new(fs::File::open(input_file)?); let mut lines = rdr.byte_lines(); let mut n_indexed = 0; while let Some(line_result) = lines.next() { let line = line_result?; let rec: Record = serde_json::from_slice(line) .map_err(|e| { io::Error::new( io::ErrorKind::InvalidData, format!("deserializing failed: {}", e) ) })?; index.add_doc(&rec.url, &[&rec.title, &rec.body]); n_indexed += 1; } let output_file = home_dir.join(OUTPUT_PATH); let mut wtr = io::BufWriter::new(fs::File::create(&output_file)?); // hope you have plenty of ram! let index_json: String = index.to_json(); wtr.write_all(index_json.as_bytes())?; let took = Instant::now() - start; println!("indexed {} records in {:?}", n_indexed, took); Ok(()) }