commit f981853f1add83c960628aea6831b62d2907792e Author: Jonathan Strong Date: Thu Mar 19 03:19:10 2020 -0400 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..992c4be --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +*.swp +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bd36530 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "data-pipelines" +version = "0.1.0" +authors = ["Jonathan Strong "] +edition = "2018" + +[[bin]] +name = "baseline-csv" +path = "src/csv.rs" + +[dependencies] +csv = "1.1" +structopt = "0.3" +serde = { version = "1", features = ["derive"] } +markets = { version = "0.2.1", registry = "jstrong-dev" } +slog = "2" +slog-async = "2" +slog-term = "2" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d69ea9d --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Jonathan Strong + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e1c9600 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# data-pipelines + +A series of experiments in Rust about maximizing throughput for large-scale data analysis. diff --git a/src/csv.rs b/src/csv.rs new file mode 100644 index 0000000..9da7269 --- /dev/null +++ b/src/csv.rs @@ -0,0 +1,64 @@ +#![allow(unused_imports)] + +#[macro_use] +extern crate slog; + +use std::path::PathBuf; +use std::time::*; +use std::io::{self, prelude::*}; +use std::fs; +use structopt::StructOpt; +use serde::{Serialize, Deserialize}; +use slog::Drain; +use markets::crypto::{Exchange, Ticker, Side}; + + +// equivalent to panic! but without the ugly 'thread main panicked' yada yada +macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{ + eprintln!($fmt, $($args)*); + std::process::exit(1); +}}} + + +#[derive(Debug, StructOpt)] +struct Opt { + /// Path to CSV file with trades data + #[structopt(short = "f", long = "trades-csv")] + #[structopt(parse(from_os_str))] + trades_csv: PathBuf, +} + +#[derive(Deserialize)] +struct Trade { + /// Unix nanoseconds + pub time: u64, + pub exch: Exchange, + pub ticker: Ticker, + pub side: Option, + pub price: f64, + pub amount: f64, +} + +fn main() { + let start = Instant::now(); + + let decorator = slog_term::TermDecorator::new().stdout().force_color().build(); + let drain = slog_term::FullFormat::new(decorator).use_utc_timestamp().build().fuse(); + let drain = slog_async::Async::new(drain).chan_size(1024 * 64).thread_name("recv".into()).build().fuse(); + let logger = slog::Logger::root(drain, o!("version" => structopt::clap::crate_version!())); + + info!(logger, "initializing..."); + + + let opt = Opt::from_args(); + + if ! opt.trades_csv.exists() { + error!(logger, "path does not exist: {}", opt.trades_csv.display()); + fatal!("Error: path does not exist: {}", opt.trades_csv.display()); + } + + info!(logger, "verified csv path exists"; "trades_csv" => %opt.trades_csv.display()); + + let took = Instant::now() - start; + info!(logger, "finished in {:?}", took); +}