You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
7.4KB

  1. use ansi_term::Colour::{Blue, Green, Red};
  2. use ansi_term::Style;
  3. use clap::ArgMatches;
  4. use serde_json;
  5. use std::convert::From;
  6. use std::fs;
  7. use std::io;
  8. use std::io::Write;
  9. use std::path::PathBuf;
  10. use tantivy;
  11. use tantivy::schema::Cardinality;
  12. use tantivy::schema::*;
  13. use tantivy::Index;
  14. pub fn run_new_cli(matches: &ArgMatches) -> Result<(), String> {
  15. let index_directory = PathBuf::from(matches.value_of("index").unwrap());
  16. run_new(index_directory).map_err(|e| format!("{:?}", e))
  17. }
  18. fn prompt_input<P: Fn(&str) -> Result<(), String>>(prompt_text: &str, predicate: P) -> String {
  19. loop {
  20. print!(
  21. "{prompt_text:<width$} ? ",
  22. prompt_text = Style::new().bold().fg(Blue).paint(prompt_text),
  23. width = 40
  24. );
  25. io::stdout().flush().unwrap();
  26. let mut buffer = String::new();
  27. io::stdin()
  28. .read_line(&mut buffer)
  29. .expect("Failed to read line");
  30. let answer = buffer.trim_end().to_string();
  31. match predicate(&answer) {
  32. Ok(()) => {
  33. return answer;
  34. }
  35. Err(msg) => {
  36. println!("Error: {}", Style::new().bold().fg(Red).paint(msg));
  37. }
  38. }
  39. }
  40. }
  41. // TODO move into core tantivy
  42. fn field_name_validate(field_name: &str) -> Result<(), String> {
  43. if is_valid_field_name(field_name) {
  44. Ok(())
  45. } else {
  46. Err(String::from(
  47. "Field name must match the pattern [_a-zA-Z0-9]+",
  48. ))
  49. }
  50. }
  51. fn prompt_options(msg: &str, codes: Vec<char>) -> char {
  52. let options_string: Vec<String> = codes.iter().map(|c| format!("{}", c)).collect();
  53. let options = options_string.join("/");
  54. let predicate = |entry: &str| {
  55. if entry.len() != 1 {
  56. return Err(format!("Invalid input. Options are ({})", options));
  57. }
  58. let c = entry.chars().next().unwrap().to_ascii_uppercase();
  59. if codes.contains(&c) {
  60. return Ok(());
  61. } else {
  62. return Err(format!("Invalid input. Options are ({})", options));
  63. }
  64. };
  65. let message = format!("{} ({})", msg, options);
  66. let entry = prompt_input(&message, predicate);
  67. entry.chars().next().unwrap().to_ascii_uppercase()
  68. }
  69. fn prompt_field_type(msg: &str, codes: Vec<&str>) -> tantivy::schema::Type {
  70. let options = codes.join("/");
  71. let predicate = |entry: &str| {
  72. // TODO make case-insensitive, currently has to match the options precisely
  73. if codes.contains(&entry) {
  74. return Ok(());
  75. } else {
  76. return Err(format!("Invalid input. Options are ({})", options));
  77. }
  78. };
  79. let message = format!("{} ({})", msg, options);
  80. let prompt_output = prompt_input(&message, predicate);
  81. match prompt_output.to_ascii_uppercase().as_ref() {
  82. "TEXT" => Type::Str,
  83. "U64" => Type::U64,
  84. "I64" => Type::I64,
  85. // "F64" => Type::F64,
  86. "DATE" => Type::Date,
  87. "FACET" => Type::HierarchicalFacet,
  88. "BYTES" => Type::Bytes,
  89. &_ => Type::Str, // shouldn't be here, the `predicate` fails before here
  90. }
  91. }
  92. fn prompt_yn(msg: &str) -> bool {
  93. prompt_options(msg, vec!['Y', 'N']) == 'Y'
  94. }
  95. fn ask_add_field_text(field_name: &str, schema_builder: &mut SchemaBuilder) {
  96. let mut text_options = TextOptions::default();
  97. if prompt_yn("Should the field be stored") {
  98. text_options = text_options.set_stored();
  99. }
  100. if prompt_yn("Should the field be indexed") {
  101. let mut text_indexing_options = TextFieldIndexing::default()
  102. .set_index_option(IndexRecordOption::Basic)
  103. .set_tokenizer("en_stem");
  104. if prompt_yn("Should the term be tokenized?") {
  105. if prompt_yn("Should the term frequencies (per doc) be in the index") {
  106. if prompt_yn("Should the term positions (per doc) be in the index") {
  107. text_indexing_options = text_indexing_options
  108. .set_index_option(IndexRecordOption::WithFreqsAndPositions);
  109. } else {
  110. text_indexing_options =
  111. text_indexing_options.set_index_option(IndexRecordOption::WithFreqs);
  112. }
  113. }
  114. } else {
  115. text_indexing_options = text_indexing_options.set_tokenizer("raw");
  116. }
  117. text_options = text_options.set_indexing_options(text_indexing_options);
  118. }
  119. schema_builder.add_text_field(field_name, text_options);
  120. }
  121. fn ask_add_num_field_with_options(
  122. field_name: &str,
  123. field_type: Type,
  124. schema_builder: &mut SchemaBuilder,
  125. ) {
  126. let mut int_options = IntOptions::default();
  127. if prompt_yn("Should the field be stored") {
  128. int_options = int_options.set_stored();
  129. }
  130. if prompt_yn("Should the field be fast") {
  131. int_options = int_options.set_fast(Cardinality::SingleValue);
  132. }
  133. if prompt_yn("Should the field be indexed") {
  134. int_options = int_options.set_indexed();
  135. }
  136. match field_type {
  137. Type::U64 => {
  138. schema_builder.add_u64_field(field_name, int_options);
  139. }
  140. // Type::F64 => {
  141. // schema_builder.add_f64_field(field_name, int_options);
  142. // }
  143. Type::I64 => {
  144. schema_builder.add_i64_field(field_name, int_options);
  145. }
  146. Type::Date => {
  147. schema_builder.add_date_field(field_name, int_options);
  148. }
  149. _ => {
  150. // We only pass to this function if the field type is numeric
  151. unreachable!();
  152. }
  153. }
  154. }
  155. fn ask_add_field(schema_builder: &mut SchemaBuilder) {
  156. println!("\n\n");
  157. let field_name = prompt_input("New field name ", field_name_validate);
  158. // Manually iterate over tantivy::schema::Type and make strings out of them
  159. // Can introduce a dependency to do it automatically, but this should be easier
  160. let possible_field_types = vec!["Text", "u64", "i64", "f64", "Date", "Facet", "Bytes"];
  161. let field_type = prompt_field_type("Choose Field Type", possible_field_types);
  162. match field_type {
  163. Type::Str => {
  164. ask_add_field_text(&field_name, schema_builder);
  165. }
  166. Type::U64 | Type::Date | Type::I64 => {
  167. // Type::U64 | Type::F64 | Type::Date | Type::I64 => {
  168. ask_add_num_field_with_options(&field_name, field_type, schema_builder);
  169. }
  170. Type::HierarchicalFacet => {
  171. schema_builder.add_facet_field(&field_name);
  172. }
  173. Type::Bytes => {
  174. schema_builder.add_bytes_field(&field_name);
  175. }
  176. }
  177. }
  178. fn run_new(directory: PathBuf) -> tantivy::Result<()> {
  179. println!(
  180. "\n{} ",
  181. Style::new().bold().fg(Green).paint("Creating new index")
  182. );
  183. println!(
  184. "{} ",
  185. Style::new()
  186. .bold()
  187. .fg(Green)
  188. .paint("First define its schema!")
  189. );
  190. let mut schema_builder = SchemaBuilder::default();
  191. loop {
  192. ask_add_field(&mut schema_builder);
  193. if !prompt_yn("Add another field") {
  194. break;
  195. }
  196. }
  197. let schema = schema_builder.build();
  198. let schema_json = format!("{}", serde_json::to_string_pretty(&schema).unwrap());
  199. println!("\n{}\n", Style::new().fg(Green).paint(schema_json));
  200. match fs::create_dir(&directory) {
  201. Ok(_) => (),
  202. // Proceed here; actual existence of index is checked in Index::create_in_dir
  203. Err(ref e) if e.kind() == io::ErrorKind::AlreadyExists => (),
  204. Err(e) => panic!("{:?}", e),
  205. };
  206. Index::create_in_dir(&directory, schema)?;
  207. Ok(())
  208. }