#![allow(unused_imports)] #[macro_use] extern crate slog; #[macro_use] extern crate markets; use std::path::PathBuf; use std::time::*; use std::io::{self, prelude::*}; use std::fs; use structopt::StructOpt; use serde::{Serialize, Deserialize}; use slog::Drain; use pretty_toa::ThousandsSep; use markets::crypto::{Exchange, Ticker, Side}; // equivalent to panic! but without the ugly 'thread main panicked' yada yada macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{ eprintln!($fmt, $($args)*); std::process::exit(1); }}} const PROGRESS_EVERY: usize = 1024 * 1024; const ONE_SECOND: u64 = 1_000_000_000; const ONE_HOUR: u64 = ONE_SECOND * 60 * 60; #[derive(Debug, StructOpt)] struct Opt { /// Path to CSV file with trades data #[structopt(short = "f", long = "trades-csv")] #[structopt(parse(from_os_str))] trades_csv: PathBuf, /// Where to save the query results (CSV output) #[structopt(short = "o", long = "output-path")] #[structopt(parse(from_os_str))] output_path: PathBuf, } #[derive(Deserialize)] struct Trade { /// Time of trade in unix nanoseconds pub time: u64, /// Exchange where trade executed pub exch: Exchange, /// Currency rate of trade (base/quote) pub ticker: Ticker, /// Price of trade, in quote denomination pub price: f64, /// Size/Volume of trade, in base denomination pub amount: f64, } fn per_sec(n: usize, span: Duration) -> f64 { if n == 0 || span < Duration::from_micros(1) { return 0.0 } let s: f64 = span.as_nanos() as f64 / 1e9f64; n as f64 / s } #[allow(dead_code)] #[inline(always)] fn manual_deserialize_bytes(row: &csv::ByteRecord) -> Result { let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?) .ok_or("parsing time failed")?; let amount: f64 = lexical::parse(row.get(1).ok_or("no amount")?) .map_err(|_| "parsing amount failed")?; let exch = match row.get(2).ok_or("no exch")? { b"bmex" => e!(bmex), b"bnce" => e!(bnce), b"btfx" => e!(btfx), b"gdax" => e!(gdax), b"okex" => e!(okex), b"bits" => e!(bits), b"plnx" => e!(plnx), b"krkn" => e!(krkn), _ => return Err("illegal exch"), }; let price: f64 = lexical::parse(row.get(3).ok_or("no price")?) .map_err(|_| "parsing price failed")?; let ticker = match row.get(6).ok_or("no ticker")? { b"btc_usd" => t!(btc-usd), b"eth_usd" => t!(eth-usd), b"ltc_usd" => t!(ltc-usd), b"etc_usd" => t!(etc-usd), b"bch_usd" => t!(bch-usd), b"xmr_usd" => t!(xmr-usd), b"usdt_usd" => t!(usdt-usd), _ => return Err("illegal ticker"), }; Ok(Trade { time, amount, exch, price, ticker }) } #[allow(dead_code)] #[inline(always)] fn manual_deserialize_str(row: &csv::StringRecord) -> Result { let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?.as_bytes()) .ok_or("parsing time failed")?; let amount: f64 = lexical::parse(row.get(1).ok_or("no amount")?) .map_err(|_| "parsing amount failed")?; let exch = match row.get(2).ok_or("no exch")? { "bmex" => e!(bmex), "bnce" => e!(bnce), "btfx" => e!(btfx), "gdax" => e!(gdax), "okex" => e!(okex), "bits" => e!(bits), "plnx" => e!(plnx), "krkn" => e!(krkn), _ => return Err("illegal exch"), }; let price: f64 = lexical::parse(row.get(3).ok_or("no price")?) .map_err(|_| "parsing price failed")?; let ticker = match row.get(6).ok_or("no ticker")? { "btc_usd" => t!(btc-usd), "eth_usd" => t!(eth-usd), "ltc_usd" => t!(ltc-usd), "etc_usd" => t!(etc-usd), "bch_usd" => t!(bch-usd), "xmr_usd" => t!(xmr-usd), "usdt_usd" => t!(usdt-usd), _ => return Err("illegal ticker"), }; Ok(Trade { time, amount, exch, price, ticker }) } /// Example of code used in discussion of increasing CSV parsing performance #[allow(dead_code)] fn fast_parse_bytes(mut rdr: csv::Reader) -> Result { // our data is ascii, so parsing with the slightly faster ByteRecord is fine let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); let mut row = csv::ByteRecord::new(); // manual_deserialize_bytes assumes the column order of the CSV, // so here we verify that it actually matches that assumption assert_eq!(headers.get(0), Some(&b"time"[..])); assert_eq!(headers.get(1), Some(&b"amount"[..])); assert_eq!(headers.get(2), Some(&b"exch"[..])); assert_eq!(headers.get(3), Some(&b"price"[..])); assert_eq!(headers.get(6), Some(&b"ticker"[..])); let mut n = 0; let mut last_time = 0; while rdr.read_byte_record(&mut row) .map_err(|e| { format!("reading row {} failed: {}", (n+1).thousands_sep(), e) })? { let trade: Trade = manual_deserialize_bytes(&row) .map_err(|e| { format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row) })?; assert!(trade.time >= last_time); last_time = trade.time; n += 1; } Ok(n) } fn run(start: Instant, logger: &slog::Logger) -> Result { let opt = Opt::from_args(); info!(logger, "initializing..."; "trades-csv" => %opt.trades_csv.display(), "output-path" => %opt.output_path.display() ); if ! opt.trades_csv.exists() { error!(logger, "path does not exist: {}", opt.trades_csv.display()); fatal!("Error: path does not exist: {}", opt.trades_csv.display()); } debug!(logger, "verified csv path exists"; "trades_csv" => %opt.trades_csv.display()); let rdr = fs::File::open(&opt.trades_csv) .map_err(|e| format!("opening trades csv file failed: {} (tried to open {})", e, opt.trades_csv.display()))?; let rdr = io::BufReader::new(rdr); let mut rdr = csv::Reader::from_reader(rdr); // initializing --output-path CSV let wtr = fs::File::create(&opt.output_path) .map_err(|e| format!("creating output csv file failed: {} (tried to create {})", e, opt.output_path.display()))?; let wtr = io::BufWriter::new(wtr); let mut wtr = csv::Writer::from_writer(wtr); wtr.write_record(&["time","ratio","bmex","gdax","n_bmex","n_gdax","bmex_amt","gdax_amt"]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?; let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); let mut row = csv::StringRecord::new(); // pull out first row to initialize query calculations rdr.read_record(&mut row).map_err(|e| format!("reading first row failed: {}", e))?; let trade: Trade = row.deserialize(Some(&headers)) .map_err(|e| { format!("deserializing first row failed: {}\n\nFailing row:\n{:?}", e, row) })?; let mut cur_hour = trade.time - trade.time % ONE_HOUR; let mut next_hour = cur_hour + ONE_HOUR; let mut bmex_total = if trade.exch == e!(bmex) { trade.price * trade.amount } else { 0.0 }; let mut bmex_amt = if trade.exch == e!(bmex) { trade.amount } else { 0.0 }; let mut n_bmex = 0; let mut gdax_total = if trade.exch == e!(gdax) { trade.price * trade.amount } else { 0.0 }; let mut gdax_amt = if trade.exch == e!(gdax) { trade.amount } else { 0.0 }; let mut n_gdax = 0; let mut n = 0; let mut n_written = 0; let mut last_time = 0; while rdr.read_record(&mut row) .map_err(|e| { format!("reading row {} failed: {}", (n+1).thousands_sep(), e) })? { let trade: Trade = row.deserialize(Some(&headers)) .map_err(|e| { format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row) })?; n += 1; if trade.ticker != t!(btc-usd) { continue } // verify data is sorted by time assert!(trade.time >= last_time); last_time = trade.time; if trade.time >= next_hour { // finalize last hour, and prepare for this hour if n_bmex == 0 || n_gdax == 0 { wtr.write_record(&[ &format!("{}", cur_hour), "NaN", "NaN", "NaN", &format!("{}", n_bmex), &format!("{}", n_gdax), &format!("{}", bmex_amt), &format!("{}", gdax_amt), ]).map_err(|e| format!("writing output row failed: {}", e))?; } else { let bmex_wt_avg = bmex_total / bmex_amt; let gdax_wt_avg = gdax_total / gdax_amt; let ratio = bmex_wt_avg / gdax_wt_avg; wtr.write_record(&[ &format!("{}", cur_hour), &format!("{}", ratio), &format!("{}", bmex_wt_avg), &format!("{}", gdax_wt_avg), &format!("{}", n_bmex), &format!("{}", n_gdax), &format!("{}", bmex_amt), &format!("{}", gdax_amt), ]).map_err(|e| format!("writing output row failed: {}", e))?; } n_written += 1; // reset state bmex_total = 0.0; bmex_amt = 0.0; gdax_total = 0.0; gdax_amt = 0.0; n_bmex = 0; n_gdax = 0; cur_hour = next_hour; next_hour += ONE_HOUR; // if we are skipping hours in between the last and current row, we // need to write a NaN row for the hours that had no data while next_hour <= trade.time { wtr.write_record(&[ &format!("{}", cur_hour), "NaN", "NaN", "NaN", "0", "0", "0.0", "0.0", ]).map_err(|e| format!("writing output row failed: {}", e))?; n_written += 1; cur_hour = next_hour; next_hour += ONE_HOUR; } } match trade.exch { e!(bmex) => { bmex_total += trade.price * trade.amount; bmex_amt += trade.amount; n_bmex += 1; } e!(gdax) => { gdax_total += trade.price * trade.amount; gdax_amt += trade.amount; n_gdax += 1; } _ => {} } if n % PROGRESS_EVERY == 0 || (cfg!(debug_assertions) && n % (1024 * 96) == 0) { info!(logger, "parsing csv file"; "n rows" => %n.thousands_sep(), "n written" => %n_written.thousands_sep(), "elapsed" => ?(Instant::now() - start), ); } if cfg!(debug_assertions) && n > PROGRESS_EVERY { warn!(logger, "debug mode: exiting early"; "n rows" => %n.thousands_sep(), "n written" => %n_written.thousands_sep(), "elapsed" => ?(Instant::now() - start), ); break } } // intentionally skipping the partial hour here info!(logger, "finished parsing CSV/calculating query. closing output file"); drop(wtr); Ok(n) } fn main() { let start = Instant::now(); let decorator = slog_term::TermDecorator::new().stdout().force_color().build(); let drain = slog_term::FullFormat::new(decorator).use_utc_timestamp().build().fuse(); let drain = slog_async::Async::new(drain).chan_size(1024 * 64).thread_name("recv".into()).build().fuse(); let logger = slog::Logger::root(drain, o!("version" => structopt::clap::crate_version!())); match run(start, &logger) { Ok(n) => { let took = Instant::now() - start; let took_secs = took.as_millis() as f64 / 1000.0; let took_str = format!("{}min, {:.1}sec", took.as_secs() / 60, took_secs % 60.0); info!(logger, "finished in {}", took_str; "n rows" => %n.thousands_sep(), "rows/sec" => &((per_sec(n, took) * 100.0).round() / 10.0).thousands_sep(), ); } Err(e) => { crit!(logger, "run failed: {:?}", e); eprintln!("\n\nError: {}", e); std::thread::sleep(Duration::from_millis(100)); std::process::exit(1); } } }