Browse Source

bbb

develop
Paul Masurel 8 years ago
parent
commit
fae1904aa3
6 changed files with 245 additions and 79 deletions
  1. +113
    -79
      src/main.rs
  2. +34
    -0
      static/index.html
  3. +21
    -0
      static/less.min.js
  4. +11
    -0
      static/main.coffee
  5. +22
    -0
      static/main.js
  6. +44
    -0
      static/style.less

+ 113
- 79
src/main.rs View File

@@ -1,118 +1,152 @@
extern crate tantivy;
extern crate time;
extern crate urlencoded;
#[macro_use]
extern crate lazy_static;
extern crate rustc_serialize;
extern crate iron;
extern crate staticfile;
extern crate mount;

use tantivy::collector::{CountCollector, FirstNCollector, MultiCollector};
use tantivy::schema::*;
use tantivy::Searcher;
use tantivy::Directory;
use std::io;
use tantivy::schema::{TextField, Term};
use tantivy::Index;
use std::convert::From;
use std::path::PathBuf;
use std::io::BufRead;
use time::PreciseTime;
use urlencoded::UrlEncodedQuery;
use tantivy::analyzer::SimpleTokenizer;
use iron::status;

extern crate iron;
extern crate staticfile;
extern crate mount;

// This example serves the docs from target/doc/staticfile at /doc/
//
// Run `cargo doc && cargo test && ./target/doc_server`, then
// point your browser to http://127.0.0.1:3000/doc/

use tantivy::analyzer::StreamingIterator;
use rustc_serialize::json::as_pretty_json;
use std::path::Path;

use staticfile::Static;
use iron::mime::Mime;
use mount::Mount;
use iron::prelude::*;

fn handle_query(searcher: &Searcher, terms: &Vec<Term>, print_fields: &Vec<TextField>) -> usize {
let mut count_collector = CountCollector::new();
let mut first_3_collector = FirstNCollector::with_limit(3);
{
let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_3_collector));
searcher.search(&terms, &mut multi_collector);
}
let mut num_docs = 0;
for doc_address in first_3_collector.docs().iter() {
let doc = searcher.doc(doc_address).unwrap();
for print_field in print_fields.iter() {
for txt in doc.get_texts(print_field) {
println!(" - txt: {:?}", txt);
#[derive(RustcDecodable, RustcEncodable)]
struct Serp {
query: String,
num_hits: usize,
hits: Vec<Hit>,
timings: Vec<Timing>,
}

#[derive(RustcDecodable, RustcEncodable)]
struct Hit {
title: String,
body: String,
}

lazy_static! {
static ref INDEX: Index = {
Index::open(&Path::new("/Users/pmasurel/wiki-index/")).unwrap()
};
}

fn parse_query(q: &String, field: &TextField) -> Vec<Term> {
let tokenizer = SimpleTokenizer::new();
let mut token_it = tokenizer.tokenize(&q);
let mut terms = Vec::new();
loop {
match token_it.next() {
Some(token) => {
terms.push(Term::from_field_text(field, &token));
}
None => { break; }
}
}
terms
}


struct TimingStarted {
name: String,
start: PreciseTime,
}

impl TimingStarted {
fn new(name: &str) -> TimingStarted {
TimingStarted {
name: String::from(name),
start: PreciseTime::now(),
}
}

fn stop(self) -> Timing {
let stop = PreciseTime::now();
Timing {
name: self.name,
duration: self.start.to(stop).num_microseconds().unwrap(),
}
}
count_collector.count()
}

// fn hello_world(_: &mut Request) -> IronResult<Response> {
// Ok(Response::with((iron::status::Ok, "Hello World")))
// }
#[derive(RustcDecodable, RustcEncodable)]
struct Timing {
name: String,
duration: i64,
}


fn search(req: &mut Request) -> IronResult<Response> {
// Extract the decoded data as hashmap, using the UrlEncodedQuery plugin.
let mut timings = Vec::new();
match req.get_ref::<UrlEncodedQuery>() {
Ok(ref qs_map) => {
println!("Parsed GET request query string:\n {:?}", qs_map);
println!("{:?}", qs_map.get("q"));
match qs_map.get("q") {
Some(qs) => {
Ok(Response::with((status::Ok, format!("Hello!, {:?}", qs)) ))
let query = qs[0].clone();
let search_timing = TimingStarted::new("search");
let searcher = INDEX.searcher().unwrap();
let schema = INDEX.schema();
let title_field = schema.text_field("title");
let body_field = schema.text_field("body");
let terms = parse_query(&query, &body_field);
let mut count_collector = CountCollector::new();
let mut first_collector = FirstNCollector::with_limit(10);
{
let mut multi_collector = MultiCollector::from(vec!(&mut count_collector, &mut first_collector));
let timings = searcher.search(&terms, &mut multi_collector).unwrap();
println!("{:?}", timings);
}
timings.push(search_timing.stop());
let storage_timing = TimingStarted::new("store");
let hits: Vec<Hit> = first_collector
.docs()
.iter()
.map(|doc_address| searcher.doc(doc_address).unwrap())
.map(|doc|
Hit {
title: doc.get_first_text(&title_field).unwrap().clone(),
body: doc.get_first_text(&body_field).unwrap().clone(),
})
.collect();
timings.push(storage_timing.stop());
let response = Serp {
query: query,
hits: hits,
num_hits: count_collector.count(),
timings: timings,
};
let resp_json = as_pretty_json(&response).indent(4);
let content_type = "application/json".parse::<Mime>().unwrap();
Ok(
Response::with((content_type, status::Ok, format!("{}", resp_json)))
)
}
None => {
Ok(Response::with((status::BadRequest, "Query not defined")))
}
}
}
Err(ref e) => Ok(Response::with((status::BadRequest, "Failed to parse query string")))
Err(_) => Ok(Response::with((status::BadRequest, "Failed to parse query string")))
}
}

fn main() {
// let directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap();
// let schema = directory.schema();
// let url_field = schema.field("url").unwrap();
// let title_field = schema.field("title").unwrap();
// let body_field = schema.field("body").unwrap();
// let print_fields = vec!(title_field, url_field);
//
// let mut directory = Directory::open(&PathBuf::from("/data/wiki-index/")).unwrap();
// let searcher = Searcher::for_directory(directory);
// let tokenizer = SimpleTokenizer::new();
//
// println!("Ready");
// let stdin = io::stdin();
// loop {
// let mut input = String::new();
// print!("> ");
// stdin.read_line(&mut input);
// if input == "exit\n" {
// break;
// }
// let mut terms: Vec<Term> = Vec::new();
// let mut token_it = tokenizer.tokenize(&input);
// loop {
// match token_it.next() {
// Some(token) => {
// terms.push(Term::from_field_text(&body_field, &token));
// }
// None => { break; }
// }
// }
// println!("Input: {:?}", input);
// println!("Keywords {:?}", terms);
// let start = PreciseTime::now();
// let num_docs = handle_query(&searcher, &terms, &print_fields);
// let stop = PreciseTime::now();
// println!("Elasped time {:?} microseconds", start.to(stop).num_microseconds().unwrap());
// println!("Num_docs {:?}", num_docs);

let mut mount = Mount::new();
mount.mount("/", search);
mount.mount("/static/", Static::new(Path::new("static/")));
mount.mount("/api", search);
mount.mount("/", Static::new(Path::new("static/")));
println!("Running on 3000");
Iron::new(mount).http("127.0.0.1:3000").unwrap();
}

+ 34
- 0
static/index.html View File

@@ -0,0 +1,34 @@
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet/less" type="text/css" href="style.less"/>
<title>Wikipedia search (powered by tantivy)</title>
</head>
<body>
<script id="template" type="x-tmpl-mustache">
<div class='query'>{{ query }}</div>
<div class='num_hits'>{{ num_hits }} articles</div>
<ul class="timings">
{{#timings}}
<li>{{name}} - {{duration}} µs</li>
{{/timings}}
</ul>
</ul>
<ul class='hits'>
{{#hits}}
<li>{{title}}</li>
{{/hits}}
</ul>

</script>
<form onsubmit="event.preventDefault(); search();">
<input type="text" id="q" autocomplete="off"></input>
<!-- <input type="submit"></input> -->
</form>
<div id="serp"></div>
<script src="less.min.js" type="text/javascript"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mustache.js/2.2.1/mustache.min.js"></script>
<script src="https://code.jquery.com/jquery-1.12.0.min.js"></script>
<script src="main.js"></script>
</body>
</html>

+ 21
- 0
static/less.min.js
File diff suppressed because it is too large
View File


+ 11
- 0
static/main.coffee View File

@@ -0,0 +1,11 @@

handle = (serp, status, resp)->
template = $('#template').html()
Mustache.parse(template)
rendered = Mustache.render(template, serp)
$("#serp").html(rendered)

window.search = ->
q = $('#q').val()
$.getJSON('/api', {q:q}, handle)
true

+ 22
- 0
static/main.js View File

@@ -0,0 +1,22 @@
// Generated by CoffeeScript 1.9.2
(function() {
var handle;

handle = function(serp, status, resp) {
var rendered, template;
template = $('#template').html();
Mustache.parse(template);
rendered = Mustache.render(template, serp);
return $("#serp").html(rendered);
};

window.search = function() {
var q;
q = $('#q').val();
$.getJSON('/api', {
q: q
}, handle);
return true;
};

}).call(this);

+ 44
- 0
static/style.less View File

@@ -0,0 +1,44 @@
body {
background-color: #efefef;
padding: 30px;
}

* {
font: 16px arial,sans-serif;
}

input, button {
font-size: 20px;
//height: 45px;
padding: 5px;
}

ul.timings {
margin-top:30px;
list-style: none outside none; margin:0; padding: 0;
color: #888;
}
li {
float: left; margin: 0 7px;
}


input {
color: #333;
}

div.num_hits {
color: #09f;
}

div.query {
display: none;
}

ul.hits {
padding: 0;
list-style-type: none;
li {
margin: 10px;
}
}

Loading…
Cancel
Save