From 9f527ea85fd00de31f1153be4d69494e858585d5 Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 19 Oct 2019 20:22:51 +0100 Subject: [PATCH 1/3] Cargo fix edition 2018 --- Cargo.toml | 1 + src/commands/bench.rs | 2 +- src/commands/index.rs | 2 +- src/commands/serve.rs | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3344f0c..39434ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ repository = "https://github.com/tantivy-search/tantivy-cli" readme = "README.md" keywords = ["search", "information", "retrieval"] license = "MIT" +edition = "2018" [dependencies] time = "0.1" diff --git a/src/commands/bench.rs b/src/commands/bench.rs index 808ca68..69f71f3 100644 --- a/src/commands/bench.rs +++ b/src/commands/bench.rs @@ -9,7 +9,7 @@ use tantivy::collector::{Count, TopDocs}; use tantivy::query::QueryParser; use tantivy::schema::{Field, Schema}; use tantivy::Index; -use timer::TimerTree; +use crate::timer::TimerTree; pub fn run_bench_cli(matches: &ArgMatches) -> Result<(), String> { let index_path = PathBuf::from(matches.value_of("index").unwrap()); diff --git a/src/commands/index.rs b/src/commands/index.rs index d822af7..b95f5d4 100644 --- a/src/commands/index.rs +++ b/src/commands/index.rs @@ -154,7 +154,7 @@ enum DocumentSource { } impl DocumentSource { - fn read(&self) -> io::Result>> { + fn read(&self) -> io::Result>> { Ok(match self { &DocumentSource::FromPipe => BufReader::new(Box::new(io::stdin())), &DocumentSource::FromFile(ref filepath) => { diff --git a/src/commands/serve.rs b/src/commands/serve.rs index ba3b5d5..8413c13 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -38,7 +38,7 @@ use tantivy::{DocAddress, Score}; use tantivy::Document; use tantivy::Index; use tantivy::IndexReader; -use timer::TimerTree; +use crate::timer::TimerTree; use urlencoded::UrlEncodedQuery; pub fn run_serve_cli(matches: &ArgMatches) -> Result<(), String> { From 5cc23d153cdec39a3045589642bb88d740b9287d Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 19 Oct 2019 20:28:30 +0100 Subject: [PATCH 2/3] cargo clippy - only select warnings, left some in --- src/commands/new.rs | 1 - src/commands/serve.rs | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/commands/new.rs b/src/commands/new.rs index 674be3d..a43a246 100644 --- a/src/commands/new.rs +++ b/src/commands/new.rs @@ -29,7 +29,6 @@ fn prompt_input Result<(), String>>(prompt_text: &str, predicate: let mut buffer = String::new(); io::stdin() .read_line(&mut buffer) - .ok() .expect("Failed to read line"); let answer = buffer.trim_end().to_string(); match predicate(&answer) { diff --git a/src/commands/serve.rs b/src/commands/serve.rs index 8413c13..1e26bd3 100644 --- a/src/commands/serve.rs +++ b/src/commands/serve.rs @@ -104,7 +104,7 @@ impl IndexServer { } } - fn create_hit(&self, score: Score, doc: &Document, doc_address: &DocAddress) -> Hit { + fn create_hit(&self, score: Score, doc: &Document, doc_address: DocAddress) -> Hit { Hit { score, doc: self.schema.to_named_doc(&doc), @@ -129,7 +129,7 @@ impl IndexServer { .iter() .map(|(score, doc_address)| { let doc: Document = searcher.doc(*doc_address).unwrap(); - self.create_hit(*score, &doc, doc_address) + self.create_hit(*score, &doc, *doc_address) }) .collect() }; From 27e340557094d9bcdc8fbbec254e3149162f49fe Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 19 Oct 2019 22:34:34 +0100 Subject: [PATCH 3/3] Added all field types supported by core tantivy to cli dialog Left F64 in comments, as it's already in tantivy master branch, but hasn't been published yet Added a todo list on our way to integrating tantivy-cli as a subcrate in core tantivy --- integration_plan.org | 18 +++++++++ src/commands/new.rs | 87 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 integration_plan.org diff --git a/integration_plan.org b/integration_plan.org new file mode 100644 index 0000000..85ca6d1 --- /dev/null +++ b/integration_plan.org @@ -0,0 +1,18 @@ +* Integrate cli as a binary built as part of tantivy-core +** Pros +*** Builds every commit +*** Allows prospective users to try tantivy +** Cons +*** Increases build time +*** Slows down core feature development + If every core feature requires changing the example cli, it will slow down review and fixing of core library functionality + +* Outline +** DONE Update to rust edition 2018 + CLOSED: [2019-10-19 Sat 20:28] +** DONE Add all datatypes supported by the library to the dialog + CLOSED: [2019-10-19 Sat 22:33] +** TODO test locally with a dependency on master of local tantivy +* Further work +** TODO Skip fields found in json, but not in the schema instead of erroring + diff --git a/src/commands/new.rs b/src/commands/new.rs index a43a246..da7ea47 100644 --- a/src/commands/new.rs +++ b/src/commands/new.rs @@ -3,6 +3,7 @@ use ansi_term::Style; use clap::ArgMatches; use serde_json; use std::convert::From; +use std::fs; use std::io; use std::io::Write; use std::path::PathBuf; @@ -10,8 +11,6 @@ use tantivy; use tantivy::schema::Cardinality; use tantivy::schema::*; use tantivy::Index; -use std::fs; - pub fn run_new_cli(matches: &ArgMatches) -> Result<(), String> { let index_directory = PathBuf::from(matches.value_of("index").unwrap()); @@ -42,6 +41,7 @@ fn prompt_input Result<(), String>>(prompt_text: &str, predicate: } } +// TODO move into core tantivy fn field_name_validate(field_name: &str) -> Result<(), String> { if is_valid_field_name(field_name) { Ok(()) @@ -71,6 +71,30 @@ fn prompt_options(msg: &str, codes: Vec) -> char { entry.chars().next().unwrap().to_ascii_uppercase() } +fn prompt_field_type(msg: &str, codes: Vec<&str>) -> tantivy::schema::Type { + let options = codes.join("/"); + let predicate = |entry: &str| { + // TODO make case-insensitive, currently has to match the options precisely + if codes.contains(&entry) { + return Ok(()); + } else { + return Err(format!("Invalid input. Options are ({})", options)); + } + }; + let message = format!("{} ({})", msg, options); + let prompt_output = prompt_input(&message, predicate); + match prompt_output.to_ascii_uppercase().as_ref() { + "TEXT" => Type::Str, + "U64" => Type::U64, + "I64" => Type::I64, + // "F64" => Type::F64, + "DATE" => Type::Date, + "FACET" => Type::HierarchicalFacet, + "BYTES" => Type::Bytes, + &_ => Type::Str, // shouldn't be here, the `predicate` fails before here + } +} + fn prompt_yn(msg: &str) -> bool { prompt_options(msg, vec!['Y', 'N']) == 'Y' } @@ -106,28 +130,63 @@ fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) { schema_builder.add_text_field(field_name, text_options); } -fn ask_add_field_u64(field_name: &str, schema_builder: &mut SchemaBuilder) { - let mut u64_options = IntOptions::default(); +fn ask_add_num_field_with_options( + field_name: &str, + field_type: Type, + schema_builder: &mut SchemaBuilder, +) { + let mut int_options = IntOptions::default(); if prompt_yn("Should the field be stored") { - u64_options = u64_options.set_stored(); + int_options = int_options.set_stored(); } if prompt_yn("Should the field be fast") { - u64_options = u64_options.set_fast(Cardinality::SingleValue); + int_options = int_options.set_fast(Cardinality::SingleValue); } if prompt_yn("Should the field be indexed") { - u64_options = u64_options.set_indexed(); + int_options = int_options.set_indexed(); + } + match field_type { + Type::U64 => { + schema_builder.add_u64_field(field_name, int_options); + } + // Type::F64 => { + // schema_builder.add_f64_field(field_name, int_options); + // } + Type::I64 => { + schema_builder.add_i64_field(field_name, int_options); + } + Type::Date => { + schema_builder.add_date_field(field_name, int_options); + } + _ => { + // We only pass to this function if the field type is numeric + unreachable!(); + } } - schema_builder.add_u64_field(field_name, u64_options); } fn ask_add_field(schema_builder: &mut SchemaBuilder) { println!("\n\n"); let field_name = prompt_input("New field name ", field_name_validate); - let text_or_integer = prompt_options("Text or unsigned 32-bit integer", vec!['T', 'I']); - if text_or_integer == 'T' { - ask_add_field_text(&field_name, schema_builder); - } else { - ask_add_field_u64(&field_name, schema_builder); + + // Manually iterate over tantivy::schema::Type and make strings out of them + // Can introduce a dependency to do it automatically, but this should be easier + let possible_field_types = vec!["Text", "u64", "i64", "f64", "Date", "Facet", "Bytes"]; + let field_type = prompt_field_type("Choose Field Type", possible_field_types); + match field_type { + Type::Str => { + ask_add_field_text(&field_name, schema_builder); + } + Type::U64 | Type::Date | Type::I64 => { + // Type::U64 | Type::F64 | Type::Date | Type::I64 => { + ask_add_num_field_with_options(&field_name, field_type, schema_builder); + } + Type::HierarchicalFacet => { + schema_builder.add_facet_field(&field_name); + } + Type::Bytes => { + schema_builder.add_bytes_field(&field_name); + } } } @@ -141,7 +200,7 @@ fn run_new(directory: PathBuf) -> tantivy::Result<()> { Style::new() .bold() .fg(Green) - .paint("Let's define it's schema!") + .paint("First define its schema!") ); let mut schema_builder = SchemaBuilder::default(); loop {