Browse Source

Merge pull request #47 from petr-tik/prepare_for_integration

Prepare for integration
develop
Paul Masurel GitHub 5 years ago
parent
commit
db69598680
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 97 additions and 20 deletions
  1. +1
    -0
      Cargo.toml
  2. +18
    -0
      integration_plan.org
  3. +1
    -1
      src/commands/bench.rs
  4. +1
    -1
      src/commands/index.rs
  5. +73
    -15
      src/commands/new.rs
  6. +3
    -3
      src/commands/serve.rs

+ 1
- 0
Cargo.toml View File

@@ -11,6 +11,7 @@ repository = "https://github.com/tantivy-search/tantivy-cli"
readme = "README.md"
keywords = ["search", "information", "retrieval"]
license = "MIT"
edition = "2018"

[dependencies]
time = "0.1"


+ 18
- 0
integration_plan.org View File

@@ -0,0 +1,18 @@
* Integrate cli as a binary built as part of tantivy-core
** Pros
*** Builds every commit
*** Allows prospective users to try tantivy
** Cons
*** Increases build time
*** Slows down core feature development
If every core feature requires changing the example cli, it will slow down review and fixing of core library functionality

* Outline
** DONE Update to rust edition 2018
CLOSED: [2019-10-19 Sat 20:28]
** DONE Add all datatypes supported by the library to the dialog
CLOSED: [2019-10-19 Sat 22:33]
** TODO test locally with a dependency on master of local tantivy
* Further work
** TODO Skip fields found in json, but not in the schema instead of erroring


+ 1
- 1
src/commands/bench.rs View File

@@ -9,7 +9,7 @@ use tantivy::collector::{Count, TopDocs};
use tantivy::query::QueryParser;
use tantivy::schema::{Field, Schema};
use tantivy::Index;
use timer::TimerTree;
use crate::timer::TimerTree;

pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> {
let index_path = PathBuf::from(matches.value_of("index").unwrap());


+ 1
- 1
src/commands/index.rs View File

@@ -154,7 +154,7 @@ enum DocumentSource {
}

impl DocumentSource {
fn read(&self) -> io::Result<BufReader<Box<Read>>> {
fn read(&self) -> io::Result<BufReader<Box<dyn Read>>> {
Ok(match self {
&DocumentSource::FromPipe => BufReader::new(Box::new(io::stdin())),
&DocumentSource::FromFile(ref filepath) => {


+ 73
- 15
src/commands/new.rs View File

@@ -3,6 +3,7 @@ use ansi_term::Style;
use clap::ArgMatches;
use serde_json;
use std::convert::From;
use std::fs;
use std::io;
use std::io::Write;
use std::path::PathBuf;
@@ -10,8 +11,6 @@ use tantivy;
use tantivy::schema::Cardinality;
use tantivy::schema::*;
use tantivy::Index;
use std::fs;


pub fn run_new_cli(matches: &ArgMatches) -> Result<(), String> {
let index_directory = PathBuf::from(matches.value_of("index").unwrap());
@@ -29,7 +28,6 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
let mut buffer = String::new();
io::stdin()
.read_line(&mut buffer)
.ok()
.expect("Failed to read line");
let answer = buffer.trim_end().to_string();
match predicate(&answer) {
@@ -43,6 +41,7 @@ fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate:
}
}

// TODO move into core tantivy
fn field_name_validate(field_name: &str) -> Result<(), String> {
if is_valid_field_name(field_name) {
Ok(())
@@ -72,6 +71,30 @@ fn prompt_options(msg: &str, codes: Vec<char>) -> char {
entry.chars().next().unwrap().to_ascii_uppercase()
}

fn prompt_field_type(msg: &str, codes: Vec<&str>) -> tantivy::schema::Type {
let options = codes.join("/");
let predicate = |entry: &str| {
// TODO make case-insensitive, currently has to match the options precisely
if codes.contains(&entry) {
return Ok(());
} else {
return Err(format!("Invalid input. Options are ({})", options));
}
};
let message = format!("{} ({})", msg, options);
let prompt_output = prompt_input(&message, predicate);
match prompt_output.to_ascii_uppercase().as_ref() {
"TEXT" => Type::Str,
"U64" => Type::U64,
"I64" => Type::I64,
// "F64" => Type::F64,
"DATE" => Type::Date,
"FACET" => Type::HierarchicalFacet,
"BYTES" => Type::Bytes,
&_ => Type::Str, // shouldn't be here, the `predicate` fails before here
}
}

fn prompt_yn(msg: &str) -> bool {
prompt_options(msg, vec!['Y', 'N']) == 'Y'
}
@@ -107,28 +130,63 @@ fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
schema_builder.add_text_field(field_name, text_options);
}

fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) {
let mut u64_options = IntOptions::default();
fn ask_add_num_field_with_options(
field_name: &str,
field_type: Type,
schema_builder: &mut SchemaBuilder,
) {
let mut int_options = IntOptions::default();
if prompt_yn("Should the field be stored") {
u64_options = u64_options.set_stored();
int_options = int_options.set_stored();
}
if prompt_yn("Should the field be fast") {
u64_options = u64_options.set_fast(Cardinality::SingleValue);
int_options = int_options.set_fast(Cardinality::SingleValue);
}
if prompt_yn("Should the field be indexed") {
u64_options = u64_options.set_indexed();
int_options = int_options.set_indexed();
}
match field_type {
Type::U64 => {
schema_builder.add_u64_field(field_name, int_options);
}
// Type::F64 => {
// schema_builder.add_f64_field(field_name, int_options);
// }
Type::I64 => {
schema_builder.add_i64_field(field_name, int_options);
}
Type::Date => {
schema_builder.add_date_field(field_name, int_options);
}
_ => {
// We only pass to this function if the field type is numeric
unreachable!();
}
}
schema_builder.add_u64_field(field_name, u64_options);
}

fn ask_add_field(schema_builder: &mut SchemaBuilder) {
println!("\n\n");
let field_name = prompt_input("New field name ", field_name_validate);
let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!['T', 'I']);
if text_or_integer == 'T' {
ask_add_field_text(&field_name, schema_builder);
} else {
ask_add_field_u64(&field_name, schema_builder);

// Manually iterate over tantivy::schema::Type and make strings out of them
// Can introduce a dependency to do it automatically, but this should be easier
let possible_field_types = vec!["Text", "u64", "i64", "f64", "Date", "Facet", "Bytes"];
let field_type = prompt_field_type("Choose Field Type", possible_field_types);
match field_type {
Type::Str => {
ask_add_field_text(&field_name, schema_builder);
}
Type::U64 | Type::Date | Type::I64 => {
// Type::U64 | Type::F64 | Type::Date | Type::I64 => {
ask_add_num_field_with_options(&field_name, field_type, schema_builder);
}
Type::HierarchicalFacet => {
schema_builder.add_facet_field(&field_name);
}
Type::Bytes => {
schema_builder.add_bytes_field(&field_name);
}
}
}

@@ -142,7 +200,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> {
Style::new()
.bold()
.fg(Green)
.paint("Let's define it's schema!")
.paint("First define its schema!")
);
let mut schema_builder = SchemaBuilder::default();
loop {


+ 3
- 3
src/commands/serve.rs View File

@@ -38,7 +38,7 @@ use tantivy::{DocAddress, Score};
use tantivy::Document;
use tantivy::Index;
use tantivy::IndexReader;
use timer::TimerTree;
use crate::timer::TimerTree;
use urlencoded::UrlEncodedQuery;

pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> {
@@ -104,7 +104,7 @@ impl IndexServer {
}
}

fn create_hit(&self, score: Score, doc: &Document, doc_address: &DocAddress) -> Hit {
fn create_hit(&self, score: Score, doc: &Document, doc_address: DocAddress) -> Hit {
Hit {
score,
doc: self.schema.to_named_doc(&doc),
@@ -129,7 +129,7 @@ impl IndexServer {
.iter()
.map(|(score, doc_address)| {
let doc: Document = searcher.doc(*doc_address).unwrap();
self.create_hit(*score, &doc, doc_address)
self.create_hit(*score, &doc, *doc_address)
})
.collect()
};


Loading…
Cancel
Save