Browse Source

initial commit

master
Jonathan Strong 1 year ago
commit
37f5d29680
4 changed files with 99 additions and 0 deletions
  1. +3
    -0
      .gitignore
  2. +17
    -0
      Cargo.toml
  3. +18
    -0
      query.js
  4. +61
    -0
      src/main.rs

+ 3
- 0
.gitignore View File

@@ -0,0 +1,3 @@
/target
*.swp
node_modules/

+ 17
- 0
Cargo.toml View File

@@ -0,0 +1,17 @@
[package]
name = "elasticlunr-indexer"
version = "0.1.0"
authors = ["Jonathan Strong <jonathan.strong@gmail.com>"]
edition = "2018"

[[bin]]
name = "indexer"
path = "src/main.rs"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
elasticlunr-rs = "2.3"
serde = { version = "1.0", features = ["derive"] }
bytelines = "2.2"
serde_json = "1.0"

+ 18
- 0
query.js View File

@@ -0,0 +1,18 @@
const fs = require('fs');
const elasticlunr = require('elasticlunr');

const start = process.hrtime();

const elapsed = (msg) => {
let now = process.hrtime(start);
console.info('[ %ds %dms elapsed ] %s', now[0], now[1] / 1000000, msg)
};

const inputFile = '../sample2.json';
//const inputFile = 'elasticlunr-wiki-index.json';

elapsed('reading file ...');
let rawData = fs.readFileSync(inputFile);
elapsed('parsing json ...');
let indexData = JSON.parse(rawData);
elapsed('indexData.length = ' + indexData.length);

+ 61
- 0
src/main.rs View File

@@ -0,0 +1,61 @@
#![allow(unused)]
use std::io::{self, prelude::*};
use std::fs;
use std::time::*;
use std::env;
use std::path::Path;
use elasticlunr::{Index, Language};
use serde::Deserialize;
use bytelines::ByteLinesReader;

#[derive(Deserialize, Debug)]
struct Record {
pub title: String,
pub body: String,
pub url: String,
}

const WIKIPEDIA_PATH: &str = "src/search/wiki-articles.json";
const OUTPUT_PATH: &str = "src/search/js/elasticlunr-wiki-index.json";

fn main() -> Result<(), io::Error> {
let start = Instant::now();

let mut index = Index::new(&["title", "body"]);

let home: String = env::var("HOME").unwrap();
let home_dir = Path::new(&home);
let input_file = home_dir.join(WIKIPEDIA_PATH);
assert!(input_file.exists(), "path does not exist: {}", input_file.display());

let mut rdr = io::BufReader::new(fs::File::open(input_file)?);
let mut lines = rdr.byte_lines();

let mut n_indexed = 0;

while let Some(line_result) = lines.next() {
let line = line_result?;
let rec: Record =
serde_json::from_slice(line)
.map_err(|e| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("deserializing failed: {}", e)
)
})?;
index.add_doc(&rec.url, &[&rec.title, &rec.body]);
n_indexed += 1;
}

let output_file = home_dir.join(OUTPUT_PATH);
let mut wtr = io::BufWriter::new(fs::File::create(&output_file)?);

// hope you have plenty of ram!
let index_json: String = index.to_json();

wtr.write_all(index_json.as_bytes())?;

let took = Instant::now() - start;
println!("indexed {} records in {:?}", n_indexed, took);
Ok(())
}

Loading…
Cancel
Save