|
|
@@ -0,0 +1,61 @@ |
|
|
|
#![allow(unused)] |
|
|
|
use std::io::{self, prelude::*}; |
|
|
|
use std::fs; |
|
|
|
use std::time::*; |
|
|
|
use std::env; |
|
|
|
use std::path::Path; |
|
|
|
use elasticlunr::{Index, Language}; |
|
|
|
use serde::Deserialize; |
|
|
|
use bytelines::ByteLinesReader; |
|
|
|
|
|
|
|
#[derive(Deserialize, Debug)] |
|
|
|
struct Record { |
|
|
|
pub title: String, |
|
|
|
pub body: String, |
|
|
|
pub url: String, |
|
|
|
} |
|
|
|
|
|
|
|
const WIKIPEDIA_PATH: &str = "src/search/wiki-articles.json"; |
|
|
|
const OUTPUT_PATH: &str = "src/search/js/elasticlunr-wiki-index.json"; |
|
|
|
|
|
|
|
fn main() -> Result<(), io::Error> { |
|
|
|
let start = Instant::now(); |
|
|
|
|
|
|
|
let mut index = Index::new(&["title", "body"]); |
|
|
|
|
|
|
|
let home: String = env::var("HOME").unwrap(); |
|
|
|
let home_dir = Path::new(&home); |
|
|
|
let input_file = home_dir.join(WIKIPEDIA_PATH); |
|
|
|
assert!(input_file.exists(), "path does not exist: {}", input_file.display()); |
|
|
|
|
|
|
|
let mut rdr = io::BufReader::new(fs::File::open(input_file)?); |
|
|
|
let mut lines = rdr.byte_lines(); |
|
|
|
|
|
|
|
let mut n_indexed = 0; |
|
|
|
|
|
|
|
while let Some(line_result) = lines.next() { |
|
|
|
let line = line_result?; |
|
|
|
let rec: Record = |
|
|
|
serde_json::from_slice(line) |
|
|
|
.map_err(|e| { |
|
|
|
io::Error::new( |
|
|
|
io::ErrorKind::InvalidData, |
|
|
|
format!("deserializing failed: {}", e) |
|
|
|
) |
|
|
|
})?; |
|
|
|
index.add_doc(&rec.url, &[&rec.title, &rec.body]); |
|
|
|
n_indexed += 1; |
|
|
|
} |
|
|
|
|
|
|
|
let output_file = home_dir.join(OUTPUT_PATH); |
|
|
|
let mut wtr = io::BufWriter::new(fs::File::create(&output_file)?); |
|
|
|
|
|
|
|
// hope you have plenty of ram! |
|
|
|
let index_json: String = index.to_json(); |
|
|
|
|
|
|
|
wtr.write_all(index_json.as_bytes())?; |
|
|
|
|
|
|
|
let took = Instant::now() - start; |
|
|
|
println!("indexed {} records in {:?}", n_indexed, took); |
|
|
|
Ok(()) |
|
|
|
} |