|
- #![allow(unused_imports)]
-
- #[macro_use]
- extern crate slog;
- #[macro_use]
- extern crate markets;
-
- use std::path::PathBuf;
- use std::time::*;
- use std::io::{self, prelude::*};
- use std::fs;
- use std::f64::NAN;
- use structopt::StructOpt;
- use serde::{Serialize, Deserialize};
- use slog::Drain;
- use pretty_toa::ThousandsSep;
- use markets::crypto::{Exchange, Ticker, Side};
- use pipelines::windows::WeightedMeanWindow;
-
-
- // equivalent to panic! but without the ugly 'thread main panicked' yada yada
- macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{
- eprintln!($fmt, $($args)*);
- std::process::exit(1);
- }}}
-
- const PROGRESS_EVERY: usize = 1024 * 1024 * 2;
- const ONE_SECOND: u64 = 1_000_000_000;
- const ONE_HOUR: u64 = ONE_SECOND * 60 * 60;
-
-
- #[derive(Debug, StructOpt)]
- struct Opt {
- /// Path to CSV file with trades data
- #[structopt(short = "f", long = "trades-csv")]
- #[structopt(parse(from_os_str))]
- trades_csv: PathBuf,
-
- /// Where to save the query results (CSV output)
- #[structopt(short = "o", long = "output-path")]
- #[structopt(parse(from_os_str))]
- output_path: PathBuf,
-
- #[structopt(short = "z", long = "hard-mode")]
- hard_mode: bool,
- }
-
- #[derive(Deserialize, Debug)]
- struct Trade {
- /// Time of trade in unix nanoseconds
- pub time: u64,
- /// Exchange where trade executed
- pub exch: Exchange,
- /// Currency rate of trade (base/quote)
- pub ticker: Ticker,
- /// Price of trade, in quote denomination
- pub price: f64,
- /// Size/Volume of trade, in base denomination
- pub amount: f64,
-
- pub server_time: u64,
- }
-
- fn per_sec(n: usize, span: Duration) -> f64 {
- if n == 0 || span < Duration::from_micros(1) { return 0.0 }
- let s: f64 = span.as_nanos() as f64 / 1e9f64;
- n as f64 / s
- }
-
- #[allow(dead_code)]
- #[inline(always)]
- fn manual_deserialize_bytes(row: &csv::ByteRecord) -> Result<Trade, &'static str> {
- let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?)
- .ok_or("parsing time failed")?;
-
- let amount: f64 = lexical::parse(row.get(1).ok_or("no amount")?)
- .map_err(|_| "parsing amount failed")?;
-
- let exch = match row.get(2).ok_or("no exch")? {
- b"bmex" => e!(bmex),
- b"bnce" => e!(bnce),
- b"btfx" => e!(btfx),
- b"gdax" => e!(gdax),
- b"okex" => e!(okex),
- b"bits" => e!(bits),
- b"plnx" => e!(plnx),
- b"krkn" => e!(krkn),
- _ => return Err("illegal exch"),
- };
-
- let price: f64 = lexical::parse(row.get(3).ok_or("no price")?)
- .map_err(|_| "parsing price failed")?;
-
- let ticker = match row.get(6).ok_or("no ticker")? {
- b"btc_usd" => t!(btc-usd),
- b"eth_usd" => t!(eth-usd),
- b"ltc_usd" => t!(ltc-usd),
- b"etc_usd" => t!(etc-usd),
- b"bch_usd" => t!(bch-usd),
- b"xmr_usd" => t!(xmr-usd),
- b"usdt_usd" => t!(usdt-usd),
- _ => return Err("illegal ticker"),
- };
-
- Ok(Trade { time, amount, exch, price, ticker,
- server_time: 0,
- })
- }
-
- #[allow(dead_code)]
- #[inline(always)]
- fn manual_deserialize_str(row: &csv::StringRecord) -> Result<Trade, &'static str> {
- let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?.as_bytes())
- .ok_or("parsing time failed")?;
-
- let amount: f64 = lexical::parse(row.get(1).ok_or("no amount")?)
- .map_err(|_| "parsing amount failed")?;
-
- let exch = match row.get(2).ok_or("no exch")? {
- "bmex" => e!(bmex),
- "bnce" => e!(bnce),
- "btfx" => e!(btfx),
- "gdax" => e!(gdax),
- "okex" => e!(okex),
- "bits" => e!(bits),
- "plnx" => e!(plnx),
- "krkn" => e!(krkn),
- _ => return Err("illegal exch"),
- };
-
- let price: f64 = lexical::parse(row.get(3).ok_or("no price")?)
- .map_err(|_| "parsing price failed")?;
-
- let ticker = match row.get(6).ok_or("no ticker")? {
- "btc_usd" => t!(btc-usd),
- "eth_usd" => t!(eth-usd),
- "ltc_usd" => t!(ltc-usd),
- "etc_usd" => t!(etc-usd),
- "bch_usd" => t!(bch-usd),
- "xmr_usd" => t!(xmr-usd),
- "usdt_usd" => t!(usdt-usd),
- _ => return Err("illegal ticker"),
- };
-
- Ok(Trade { time, amount, exch, price, ticker,
- server_time: 0,
- })
- }
-
- /// Example of code used in discussion of increasing CSV parsing performance
- #[allow(dead_code)]
- fn fast_parse_bytes<R: Read>(mut rdr: csv::Reader<R>) -> Result<usize, String> {
- // our data is ascii, so parsing with the slightly faster ByteRecord is fine
- let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
- let mut row = csv::ByteRecord::new();
-
- // manual_deserialize_bytes assumes the column order of the CSV,
- // so here we verify that it actually matches that assumption
- assert_eq!(headers.get(0), Some(&b"time"[..]));
- assert_eq!(headers.get(1), Some(&b"amount"[..]));
- assert_eq!(headers.get(2), Some(&b"exch"[..]));
- assert_eq!(headers.get(3), Some(&b"price"[..]));
- assert_eq!(headers.get(6), Some(&b"ticker"[..]));
-
- let mut n = 0;
- let mut last_time = 0;
-
- while rdr.read_byte_record(&mut row)
- .map_err(|e| {
- format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- })?
- {
- let trade: Trade = manual_deserialize_bytes(&row)
- .map_err(|e| {
- format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row)
- })?;
- assert!(trade.time >= last_time);
- last_time = trade.time;
-
- n += 1;
- }
-
- Ok(n)
- }
-
- fn hard_mode<R, W>(
- mut rdr: csv::Reader<R>,
- mut wtr: csv::Writer<W>,
- logger: &slog::Logger,
- ) -> Result<usize, String>
- where R: Read,
- W: Write
- {
- let logger = logger.new(o!("hard-mode" => "challenge accepted"));
- info!(logger, "beginning hard mode");
-
- let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
- let mut row = csv::StringRecord::new();
-
- // pull out first row to initialize query calculations
- rdr.read_record(&mut row).map_err(|e| format!("reading first row failed: {}", e))?;
- let trade: Trade = row.deserialize(Some(&headers))
- .map_err(|e| {
- format!("deserializing first row failed: {}\n\nFailing row:\n{:?}", e, row)
- })?;
-
- let mut cur_bucket = trade.time - (trade.time % (ONE_SECOND * 10)) + ONE_SECOND * 10;
- // let mut next_bucket = cur_bucket + ONE_SECOND * 10;
- // let mut last_price: f64 = NAN;
-
- #[derive(Default, Clone)]
- struct Lookbacks<T> {
- pub p5: T,
- pub p15: T,
- pub p60: T,
- }
-
- let mut ratios: Lookbacks<f64> = Default::default();
-
- let mut bmex_windows: Lookbacks<WeightedMeanWindow> =
- Lookbacks {
- p5: WeightedMeanWindow::new(ONE_SECOND * 60 * 5 ),
- p15: WeightedMeanWindow::new(ONE_SECOND * 60 * 15),
- p60: WeightedMeanWindow::new(ONE_SECOND * 60 * 60),
- };
-
- let mut gdax_windows = bmex_windows.clone();
-
- //let mut row_buffers: [Vec<u8>; 4] = [Vec::with_capacity(32), Vec::with_capacity(32), Vec::with_capacity(32), Vec::with_capacity(32)];
-
- let mut row_buffer: Vec<u8> = Vec::with_capacity(512);
-
- macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body
- ($trade:ident) => {{
- match $trade.exch {
- e!(bmex) => {
- bmex_windows.p5 .push($trade.time, $trade.price, $trade.amount);
- bmex_windows.p15.push($trade.time, $trade.price, $trade.amount);
- bmex_windows.p60.push($trade.time, $trade.price, $trade.amount);
-
- // last_price = $trade.price;
- }
-
- e!(gdax) => {
- gdax_windows.p5 .push($trade.time, $trade.price, $trade.amount);
- gdax_windows.p15.push($trade.time, $trade.price, $trade.amount);
- gdax_windows.p60.push($trade.time, $trade.price, $trade.amount);
-
- // last_price = $trade.price;
- }
-
- _ => {}
- }
- }}
- }
-
- wtr.write_record(&[
- "time",
- "r5",
- "r15",
- "r60",
- ]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?;
-
- let mut wtr = wtr.into_inner().map_err(|e| format!("csv::Writer::into_inner failed: {}", e))?;
-
- if trade.ticker == t!(btc-usd) { update!(trade); }
-
- let mut n = 0;
- let mut n_written = 0;
-
- while rdr.read_record(&mut row)
- .map_err(|e| {
- format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- })?
- {
- n += 1;
-
- let trade: Trade = row.deserialize(Some(&headers))
- .map_err(|e| {
- format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row)
- })?;
-
- if trade.time > cur_bucket {
- debug!(logger, "about to purge";
- "n" => n,
- "n written" => n_written,
- "trade.time" => trade.time,
- "cur_bucket" => cur_bucket,
- "gdax p5 len" => gdax_windows.p5.len(),
- "gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
- );
-
- bmex_windows.p5 .purge(trade.time);
- bmex_windows.p15.purge(trade.time);
- bmex_windows.p60.purge(trade.time);
-
- gdax_windows.p5 .purge(trade.time);
- gdax_windows.p15.purge(trade.time);
- gdax_windows.p60.purge(trade.time);
-
- debug!(logger, "finished purge";
- "n" => n,
- "n written" => n_written,
- "trade.time" => trade.time,
- "cur_bucket" => cur_bucket,
- "gdax p5 len" => gdax_windows.p5.len(),
- "gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
- );
-
- ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean();
- ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean();
- ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean();
-
- //row_buffers.iter_mut().for_each(|x| x.clear());
- row_buffer.clear();
-
- itoa::write(&mut row_buffer, cur_bucket).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
- row_buffer.push(b',');
- dtoa::write(&mut row_buffer, ratios.p5 ).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
- row_buffer.push(b',');
- dtoa::write(&mut row_buffer, ratios.p15).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
- row_buffer.push(b',');
- dtoa::write(&mut row_buffer, ratios.p60).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
- row_buffer.push(b'\n');
-
- wtr.write_all(&row_buffer[..]).map_err(|e| format!("writing row failed: {}", e))?;
-
- /*
- wtr.write_record(&row_buffers[..]).map_err(|e| {
- format!("writing csv row failed: {}", e)
- })?;
- */
-
- /*
- wtr.write_record(&[
- &format!("{}", cur_bucket),
- &format!("{}", ratios.p5),
- &format!("{}", ratios.p15),
- &format!("{}", ratios.p60),
- ]).map_err(|e| {
- format!("writing csv row failed: {}", e)
- })?;
- */
- n_written += 1;
- cur_bucket += ONE_SECOND * 10;
- }
-
- if trade.ticker == t!(btc-usd) { update!(trade); }
-
- if n % PROGRESS_EVERY == 0 {
- info!(logger, "calculating hard query";
- "n" => %n.thousands_sep(),
- "n_written" => %n_written.thousands_sep(),
- "ratios.p5" => ratios.p5,
- "ratios.p15" => ratios.p15,
- "ratios.p60" => ratios.p60,
- );
- }
- }
-
- info!(logger, "finished with hard query");
-
- Ok(n)
- }
-
- fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> {
- let opt = Opt::from_args();
-
- info!(logger, "initializing...";
- "trades-csv" => %opt.trades_csv.display(),
- "output-path" => %opt.output_path.display()
- );
-
- if ! opt.trades_csv.exists() {
- error!(logger, "path does not exist: {}", opt.trades_csv.display());
- fatal!("Error: path does not exist: {}", opt.trades_csv.display());
- }
-
- debug!(logger, "verified csv path exists"; "trades_csv" => %opt.trades_csv.display());
-
- let rdr = fs::File::open(&opt.trades_csv)
- .map_err(|e| format!("opening trades csv file failed: {} (tried to open {})", e, opt.trades_csv.display()))?;
-
- let rdr = io::BufReader::new(rdr);
-
- let mut rdr = csv::Reader::from_reader(rdr);
-
- // initializing --output-path CSV
- let wtr = fs::File::create(&opt.output_path)
- .map_err(|e| format!("creating output csv file failed: {} (tried to create {})", e, opt.output_path.display()))?;
-
- let wtr = io::BufWriter::new(wtr);
-
- let mut wtr = csv::Writer::from_writer(wtr);
-
- if opt.hard_mode { return hard_mode(rdr, wtr, &logger) }
-
- wtr.write_record(&[
- "time",
- "ratio",
- "bmex",
- "gdax",
- ]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?;
-
- //let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
- //let mut row = csv::StringRecord::new();
-
- let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
- let mut row = csv::ByteRecord::new();
-
- // pull out first row to initialize query calculations
- //rdr.read_record(&mut row).map_err(|e| format!("reading first row failed: {}", e))?;
- rdr.read_byte_record(&mut row).map_err(|e| format!("reading first row failed: {}", e))?;
- let trade: Trade = row.deserialize(Some(&headers))
- .map_err(|e| {
- format!("deserializing first row failed: {}\n\nFailing row:\n{:?}", e, row)
- })?;
-
- let mut cur_hour = trade.time - trade.time % ONE_HOUR;
- let mut next_hour = cur_hour + ONE_HOUR;
-
- let mut bmex_total = if trade.exch == e!(bmex) { trade.price * trade.amount } else { 0.0 };
- let mut bmex_amt = if trade.exch == e!(bmex) { trade.amount } else { 0.0 };
- let mut n_bmex = 0;
-
- let mut gdax_total = if trade.exch == e!(gdax) { trade.price * trade.amount } else { 0.0 };
- let mut gdax_amt = if trade.exch == e!(gdax) { trade.amount } else { 0.0 };
- let mut n_gdax = 0;
-
- let mut n = 0;
- let mut n_written = 0;
- let mut last_time = 0;
-
- // while rdr.read_record(&mut row)
- // .map_err(|e| {
- // format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- // })?
- // {
-
- while rdr.read_byte_record(&mut row)
- .map_err(|e| {
- format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- })?
- {
-
- let trade: Trade = row.deserialize(Some(&headers))
- .map_err(|e| {
- format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row)
- })?;
-
- n += 1;
-
- if n % PROGRESS_EVERY == 0 || (cfg!(debug_assertions) && n % (1024 * 96) == 0) {
- info!(logger, "parsing csv file";
- "n rows" => %n.thousands_sep(),
- "n written" => %n_written.thousands_sep(),
- "elapsed" => ?(Instant::now() - start),
- );
- }
-
- if trade.server_time != 0 {
- let diff: i64 = (trade.server_time as i64 - trade.time as i64) / 1000 / 1000;
- assert!(diff >= std::i32::MIN as i64, "diff = {}, trade = {:?}", diff, trade);
- assert!(diff <= std::i32::MAX as i64, "diff = {}, trade = {:?}", diff, trade);
- }
-
- // verify data is sorted by time
- assert!(trade.time >= last_time);
- last_time = trade.time;
-
- if trade.ticker != t!(btc-usd) { continue }
-
- if trade.time >= next_hour {
- // `trade` is past the last hour bucket, so finalize/write last
- // hour results, and reset state for this hour
-
- if n_bmex == 0 || n_gdax == 0 {
- wtr.write_record(&[
- &format!("{}", cur_hour),
- "NaN",
- "NaN",
- "NaN",
- ]).map_err(|e| format!("writing output row failed: {}", e))?;
- } else {
- let bmex_wt_avg = bmex_total / bmex_amt;
- let gdax_wt_avg = gdax_total / gdax_amt;
- let ratio = bmex_wt_avg / gdax_wt_avg;
- wtr.write_record(&[
- &format!("{}", cur_hour),
- &format!("{}", ratio),
- &format!("{}", bmex_wt_avg),
- &format!("{}", gdax_wt_avg),
- ]).map_err(|e| format!("writing output row failed: {}", e))?;
- }
- n_written += 1;
-
- // reset state
- bmex_total = 0.0;
- bmex_amt = 0.0;
- gdax_total = 0.0;
- gdax_amt = 0.0;
- n_bmex = 0;
- n_gdax = 0;
-
- cur_hour = next_hour;
- next_hour += ONE_HOUR;
-
- // if we are skipping hours in between the last and current row, we
- // need to write a NaN row for the hours that had no data
- while next_hour <= trade.time {
- wtr.write_record(&[
- &format!("{}", cur_hour),
- "NaN",
- "NaN",
- "NaN",
- ]).map_err(|e| format!("writing output row failed: {}", e))?;
-
- n_written += 1;
- cur_hour = next_hour;
- next_hour += ONE_HOUR;
- }
- }
-
- match trade.exch {
- e!(bmex) => {
- bmex_total += trade.price * trade.amount;
- bmex_amt += trade.amount;
- n_bmex += 1;
- }
-
- e!(gdax) => {
- gdax_total += trade.price * trade.amount;
- gdax_amt += trade.amount;
- n_gdax += 1;
- }
-
- _ => {}
- }
-
- if cfg!(debug_assertions) && n > PROGRESS_EVERY {
- warn!(logger, "debug mode: exiting early";
- "n rows" => %n.thousands_sep(),
- "n written" => %n_written.thousands_sep(),
- "elapsed" => ?(Instant::now() - start),
- );
- break
- }
- }
-
- // intentionally skipping handling the partial hour here
-
- info!(logger, "finished parsing CSV/calculating query. closing output file");
- drop(wtr);
-
- Ok(n)
- }
-
- fn main() {
- let start = Instant::now();
-
- let decorator = slog_term::TermDecorator::new().stdout().force_color().build();
- let drain = slog_term::FullFormat::new(decorator).use_utc_timestamp().build().fuse();
- let drain = slog_async::Async::new(drain).chan_size(1024 * 64).thread_name("recv".into()).build().fuse();
- let logger = slog::Logger::root(drain, o!("version" => structopt::clap::crate_version!()));
-
- match run(start, &logger) {
- Ok(n) => {
- let took = Instant::now() - start;
- let took_secs = took.as_millis() as f64 / 1000.0;
- let took_str = format!("{}min, {:.1}sec", took.as_secs() / 60, took_secs % 60.0);
-
- info!(logger, "finished in {}", took_str;
- "n rows" => %n.thousands_sep(),
- "rows/sec" => &((per_sec(n, took) * 100.0).round() / 100.0).thousands_sep(),
- );
- }
-
- Err(e) => {
- crit!(logger, "run failed: {:?}", e);
- eprintln!("\n\nError: {}", e);
- std::thread::sleep(Duration::from_millis(100));
- std::process::exit(1);
- }
- }
- }
|