You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

416 lines
14KB

  1. extern crate toml;
  2. extern crate serde_json;
  3. use utils::fs::{read_file, is_path_in_directory, get_file_time};
  4. use std::hash::{Hasher, Hash};
  5. use std::str::FromStr;
  6. use std::fmt;
  7. use std::collections::hash_map::DefaultHasher;
  8. use reqwest::{Client, header};
  9. use url::Url;
  10. use std::path::PathBuf;
  11. use std::sync::{Arc, Mutex};
  12. use csv::Reader;
  13. use std::collections::HashMap;
  14. use tera::{GlobalFn, Value, from_value, to_value, Result, Map, Error};
  15. static GET_DATA_ARGUMENT_ERROR_MESSAGE: &str = "`load_data`: requires EITHER a `path` or `url` argument";
  16. enum DataSource {
  17. Url(Url),
  18. Path(PathBuf)
  19. }
  20. #[derive(Debug)]
  21. enum OutputFormat {
  22. Toml,
  23. Json,
  24. Csv,
  25. Plain
  26. }
  27. impl fmt::Display for OutputFormat {
  28. fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  29. fmt::Debug::fmt(self, f)
  30. }
  31. }
  32. impl Hash for OutputFormat {
  33. fn hash<H: Hasher>(&self, state: &mut H) {
  34. self.to_string().hash(state);
  35. }
  36. }
  37. impl FromStr for OutputFormat {
  38. type Err = Error;
  39. fn from_str(output_format: &str) -> Result<Self> {
  40. return match output_format {
  41. "toml" => Ok(OutputFormat::Toml),
  42. "csv" => Ok(OutputFormat::Csv),
  43. "json" => Ok(OutputFormat::Json),
  44. "plain" => Ok(OutputFormat::Plain),
  45. format => Err(format!("Unknown output format {}", format).into())
  46. };
  47. }
  48. }
  49. impl OutputFormat {
  50. fn as_accept_header(&self) -> header::HeaderValue {
  51. return header::HeaderValue::from_static(match self {
  52. OutputFormat::Json => "application/json",
  53. OutputFormat::Csv => "text/csv",
  54. OutputFormat::Toml => "application/toml",
  55. OutputFormat::Plain => "text/plain",
  56. });
  57. }
  58. }
  59. impl DataSource {
  60. fn from_args(path_arg: Option<String>, url_arg: Option<String>, content_path: &PathBuf) -> Result<Self> {
  61. if path_arg.is_some() && url_arg.is_some() {
  62. return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
  63. }
  64. if let Some(path) = path_arg {
  65. let full_path = content_path.join(path);
  66. if !full_path.exists() {
  67. return Err(format!("{} doesn't exist", full_path.display()).into());
  68. }
  69. return Ok(DataSource::Path(full_path));
  70. }
  71. if let Some(url) = url_arg {
  72. return Url::parse(&url).map(|parsed_url| DataSource::Url(parsed_url)).map_err(|e| format!("Failed to parse {} as url: {}", url, e).into());
  73. }
  74. return Err(GET_DATA_ARGUMENT_ERROR_MESSAGE.into());
  75. }
  76. fn get_cache_key(&self, format: &OutputFormat) -> u64 {
  77. let mut hasher = DefaultHasher::new();
  78. format.hash(&mut hasher);
  79. self.hash(&mut hasher);
  80. return hasher.finish();
  81. }
  82. }
  83. impl Hash for DataSource {
  84. fn hash<H: Hasher>(&self, state: &mut H) {
  85. match self {
  86. DataSource::Url(url) => url.hash(state),
  87. DataSource::Path(path) => {
  88. path.hash(state);
  89. get_file_time(&path).expect("get file time").hash(state);
  90. }
  91. };
  92. }
  93. }
  94. fn get_data_from_args(content_path: &PathBuf, args: &HashMap<String, Value>) -> Result<DataSource> {
  95. let path_arg = optional_arg!(
  96. String,
  97. args.get("path"),
  98. GET_DATA_ARGUMENT_ERROR_MESSAGE
  99. );
  100. let url_arg = optional_arg!(
  101. String,
  102. args.get("url"),
  103. GET_DATA_ARGUMENT_ERROR_MESSAGE
  104. );
  105. return DataSource::from_args(path_arg, url_arg, content_path);
  106. }
  107. fn read_data_file(base_path: &PathBuf, full_path: PathBuf) -> Result<String> {
  108. if !is_path_in_directory(&base_path, &full_path).map_err(|e| format!("Failed to read data file {}: {}", full_path.display(), e))? {
  109. return Err(format!("{} is not inside the base site directory {}", full_path.display(), base_path.display()).into());
  110. }
  111. return read_file(&full_path)
  112. .map_err(|e| format!("`load_data`: error {} loading file {}", full_path.to_str().unwrap(), e).into());
  113. }
  114. fn get_output_format_from_args(args: &HashMap<String, Value>, data_source: &DataSource) -> Result<OutputFormat> {
  115. let format_arg = optional_arg!(
  116. String,
  117. args.get("format"),
  118. "`load_data`: `format` needs to be an argument with a string value, being one of the supported `load_data` file types (csv, json, toml)"
  119. );
  120. if let Some(format) = format_arg {
  121. return OutputFormat::from_str(&format);
  122. }
  123. let from_extension = if let DataSource::Path(path) = data_source {
  124. let extension_result: Result<&str> = path.extension().map(|extension| extension.to_str().unwrap()).ok_or(format!("Could not determine format for {} from extension", path.display()).into());
  125. extension_result?
  126. } else {
  127. "plain"
  128. };
  129. return OutputFormat::from_str(from_extension);
  130. }
  131. /// A global function to load data from a data file.
  132. /// Currently the supported formats are json, toml and csv
  133. pub fn make_load_data(content_path: PathBuf, base_path: PathBuf) -> GlobalFn {
  134. let mut headers = header::HeaderMap::new();
  135. headers.insert(header::USER_AGENT, "zola".parse().unwrap());
  136. let client = Arc::new(Mutex::new(Client::builder().build().expect("reqwest client build")));
  137. let result_cache: Arc<Mutex<HashMap<u64, Value>>> = Arc::new(Mutex::new(HashMap::new()));
  138. Box::new(move |args| -> Result<Value> {
  139. let data_source = get_data_from_args(&content_path, &args)?;
  140. let file_format = get_output_format_from_args(&args, &data_source)?;
  141. let cache_key = data_source.get_cache_key(&file_format);
  142. let mut cache = result_cache.lock().expect("result cache lock");
  143. let response_client = client.lock().expect("response client lock");
  144. if let Some(cached_result) = cache.get(&cache_key) {
  145. return Ok(cached_result.clone());
  146. }
  147. let data = match data_source {
  148. DataSource::Path(path) => read_data_file(&base_path, path),
  149. DataSource::Url(url) => {
  150. let mut response = response_client.get(url.as_str()).header(header::ACCEPT, file_format.as_accept_header()).send().and_then(|res| res.error_for_status()).map_err(|e| format!("Failed to request {}: {}", url, e.status().expect("response status")))?;
  151. response.text().map_err(|e| format!("Failed to parse response from {}: {:?}", url, e).into())
  152. },
  153. }?;
  154. let result_value: Result<Value> = match file_format {
  155. OutputFormat::Toml => load_toml(data),
  156. OutputFormat::Csv => load_csv(data),
  157. OutputFormat::Json => load_json(data),
  158. OutputFormat::Plain => to_value(data).map_err(|e| e.into()),
  159. };
  160. if let Ok(data_result) = &result_value {
  161. cache.insert(cache_key, data_result.clone());
  162. }
  163. result_value
  164. })
  165. }
  166. /// load/parse a json file from the given path and place it into a
  167. /// tera value
  168. fn load_json(json_data: String) -> Result<Value> {
  169. let json_content: Value = serde_json::from_str(json_data.as_str()).map_err(|e| format!("{:?}", e))?;
  170. return Ok(json_content);
  171. }
  172. /// load/parse a toml file from the given path, and place it into a
  173. /// tera Value
  174. fn load_toml(toml_data: String) -> Result<Value> {
  175. let toml_content: toml::Value = toml::from_str(&toml_data).map_err(|e| format!("{:?}", e))?;
  176. to_value(toml_content).map_err(|e| e.into())
  177. }
  178. /// Load/parse a csv file from the given path, and place it into a
  179. /// tera Value.
  180. ///
  181. /// An example csv file `example.csv` could be:
  182. /// ```csv
  183. /// Number, Title
  184. /// 1,Gutenberg
  185. /// 2,Printing
  186. /// ```
  187. /// The json value output would be:
  188. /// ```json
  189. /// {
  190. /// "headers": ["Number", "Title"],
  191. /// "records": [
  192. /// ["1", "Gutenberg"],
  193. /// ["2", "Printing"]
  194. /// ],
  195. /// }
  196. /// ```
  197. fn load_csv(csv_data: String) -> Result<Value> {
  198. let mut reader = Reader::from_reader(csv_data.as_bytes());
  199. let mut csv_map = Map::new();
  200. {
  201. let hdrs = reader.headers()
  202. .map_err(|e| format!("'load_data': {} - unable to read CSV header line (line 1) for CSV file", e))?;
  203. let headers_array = hdrs.iter()
  204. .map(|v| Value::String(v.to_string()))
  205. .collect();
  206. csv_map.insert(String::from("headers"), Value::Array(headers_array));
  207. }
  208. {
  209. let records = reader.records();
  210. let mut records_array: Vec<Value> = Vec::new();
  211. for result in records {
  212. let record = result.unwrap();
  213. let mut elements_array: Vec<Value> = Vec::new();
  214. for e in record.into_iter() {
  215. elements_array.push(Value::String(String::from(e)));
  216. }
  217. records_array.push(Value::Array(elements_array));
  218. }
  219. csv_map.insert(String::from("records"), Value::Array(records_array));
  220. }
  221. let csv_value: Value = Value::Object(csv_map);
  222. to_value(csv_value).map_err(|err| err.into())
  223. }
  224. #[cfg(test)]
  225. mod tests {
  226. use super::{make_load_data, DataSource, OutputFormat};
  227. use std::collections::HashMap;
  228. use std::path::PathBuf;
  229. use tera::to_value;
  230. fn get_test_file(filename: &str) -> PathBuf {
  231. let test_files = PathBuf::from("../utils/test-files").canonicalize().unwrap();
  232. return test_files.join(filename);
  233. }
  234. #[test]
  235. fn fails_when_missing_file() {
  236. let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils"));
  237. let mut args = HashMap::new();
  238. args.insert("path".to_string(), to_value("../../../READMEE.md").unwrap());
  239. let result = static_fn(args);
  240. assert!(result.is_err());
  241. assert!(result.unwrap_err().description().contains("READMEE.md doesn't exist"));
  242. }
  243. #[test]
  244. fn cant_load_outside_content_dir() {
  245. let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils"));
  246. let mut args = HashMap::new();
  247. args.insert("path".to_string(), to_value("../../../README.md").unwrap());
  248. args.insert("format".to_string(), to_value("plain").unwrap());
  249. let result = static_fn(args);
  250. assert!(result.is_err());
  251. assert!(result.unwrap_err().description().contains("README.md is not inside the base site directory"));
  252. }
  253. #[test]
  254. fn calculates_cache_key_for_path() {
  255. // We can't test against a fixed value, due to the fact the cache key is built from the absolute path
  256. let cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
  257. let cache_key_2 = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
  258. assert_eq!(cache_key, cache_key_2);
  259. }
  260. #[test]
  261. fn calculates_cache_key_for_url() {
  262. let cache_key = DataSource::Url("https://api.github.com/repos/getzola/zola".parse().unwrap()).get_cache_key(&OutputFormat::Plain);
  263. assert_eq!(cache_key, 8916756616423791754);
  264. }
  265. #[test]
  266. fn different_cache_key_per_filename() {
  267. let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
  268. let json_cache_key = DataSource::Path(get_test_file("test.json")).get_cache_key(&OutputFormat::Toml);
  269. assert_ne!(toml_cache_key, json_cache_key);
  270. }
  271. #[test]
  272. fn different_cache_key_per_format() {
  273. let toml_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Toml);
  274. let json_cache_key = DataSource::Path(get_test_file("test.toml")).get_cache_key(&OutputFormat::Json);
  275. assert_ne!(toml_cache_key, json_cache_key);
  276. }
  277. #[test]
  278. fn can_load_remote_data() {
  279. let static_fn = make_load_data(PathBuf::new(), PathBuf::new());
  280. let mut args = HashMap::new();
  281. args.insert("url".to_string(), to_value("https://httpbin.org/json").unwrap());
  282. args.insert("format".to_string(), to_value("json").unwrap());
  283. let result = static_fn(args).unwrap();
  284. assert_eq!(result.get("slideshow").unwrap().get("title").unwrap(), &to_value("Sample Slide Show").unwrap());
  285. }
  286. #[test]
  287. fn fails_when_request_404s() {
  288. let static_fn = make_load_data(PathBuf::new(), PathBuf::new());
  289. let mut args = HashMap::new();
  290. args.insert("url".to_string(), to_value("https://httpbin.org/status/404/").unwrap());
  291. args.insert("format".to_string(), to_value("json").unwrap());
  292. let result = static_fn(args);
  293. assert!(result.is_err());
  294. assert_eq!(result.unwrap_err().description(), "Failed to request https://httpbin.org/status/404/: 404 Not Found");
  295. }
  296. #[test]
  297. fn can_load_toml()
  298. {
  299. let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
  300. let mut args = HashMap::new();
  301. args.insert("path".to_string(), to_value("test.toml").unwrap());
  302. let result = static_fn(args.clone()).unwrap();
  303. //TOML does not load in order, and also dates are not returned as strings, but
  304. //rather as another object with a key and value
  305. assert_eq!(result, json!({
  306. "category": {
  307. "date": {
  308. "$__toml_private_datetime": "1979-05-27T07:32:00Z"
  309. },
  310. "key": "value"
  311. },
  312. }));
  313. }
  314. #[test]
  315. fn can_load_csv()
  316. {
  317. let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
  318. let mut args = HashMap::new();
  319. args.insert("path".to_string(), to_value("test.csv").unwrap());
  320. let result = static_fn(args.clone()).unwrap();
  321. assert_eq!(result, json!({
  322. "headers": ["Number", "Title"],
  323. "records": [
  324. ["1", "Gutenberg"],
  325. ["2", "Printing"]
  326. ],
  327. }))
  328. }
  329. #[test]
  330. fn can_load_json()
  331. {
  332. let static_fn = make_load_data(PathBuf::from("../utils/test-files"), PathBuf::from("../utils/test-files"));
  333. let mut args = HashMap::new();
  334. args.insert("path".to_string(), to_value("test.json").unwrap());
  335. let result = static_fn(args.clone()).unwrap();
  336. assert_eq!(result, json!({
  337. "key": "value",
  338. "array": [1, 2, 3],
  339. "subpackage": {
  340. "subkey": 5
  341. }
  342. }))
  343. }
  344. }