|
@@ -23,6 +23,8 @@ macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{ |
|
|
}}} |
|
|
}}} |
|
|
|
|
|
|
|
|
const PROGRESS_EVERY: usize = 1024 * 1024; |
|
|
const PROGRESS_EVERY: usize = 1024 * 1024; |
|
|
|
|
|
const ONE_SECOND: u64 = 1_000_000_000; |
|
|
|
|
|
const ONE_HOUR: u64 = ONE_SECOND * 60 * 60; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, StructOpt)] |
|
|
#[derive(Debug, StructOpt)] |
|
@@ -40,27 +42,25 @@ struct Opt { |
|
|
|
|
|
|
|
|
#[derive(Deserialize)] |
|
|
#[derive(Deserialize)] |
|
|
struct Trade { |
|
|
struct Trade { |
|
|
/// Unix nanoseconds |
|
|
|
|
|
|
|
|
/// Time of trade in unix nanoseconds |
|
|
pub time: u64, |
|
|
pub time: u64, |
|
|
|
|
|
/// Exchange where trade executed |
|
|
pub exch: Exchange, |
|
|
pub exch: Exchange, |
|
|
|
|
|
/// Currency rate of trade (base/quote) |
|
|
pub ticker: Ticker, |
|
|
pub ticker: Ticker, |
|
|
//pub side: Option<Side>, |
|
|
|
|
|
|
|
|
/// Price of trade, in quote denomination |
|
|
pub price: f64, |
|
|
pub price: f64, |
|
|
|
|
|
/// Size/Volume of trade, in base denomination |
|
|
pub amount: f64, |
|
|
pub amount: f64, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/* |
|
|
|
|
|
struct HourSummary { |
|
|
|
|
|
pub n_trades: usize, |
|
|
|
|
|
pub |
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
fn per_sec(n: usize, span: Duration) -> f64 { |
|
|
fn per_sec(n: usize, span: Duration) -> f64 { |
|
|
if n == 0 || span < Duration::from_micros(1) { return 0.0 } |
|
|
if n == 0 || span < Duration::from_micros(1) { return 0.0 } |
|
|
let s: f64 = span.as_nanos() as f64 / 1e9f64; |
|
|
let s: f64 = span.as_nanos() as f64 / 1e9f64; |
|
|
n as f64 / s |
|
|
n as f64 / s |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[allow(dead_code)] |
|
|
#[inline(always)] |
|
|
#[inline(always)] |
|
|
fn manual_deserialize_bytes(row: &csv::ByteRecord) -> Result<Trade, &'static str> { |
|
|
fn manual_deserialize_bytes(row: &csv::ByteRecord) -> Result<Trade, &'static str> { |
|
|
let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?) |
|
|
let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?) |
|
@@ -98,6 +98,7 @@ fn manual_deserialize_bytes(row: &csv::ByteRecord) -> Result<Trade, &'static str |
|
|
Ok(Trade { time, amount, exch, price, ticker }) |
|
|
Ok(Trade { time, amount, exch, price, ticker }) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[allow(dead_code)] |
|
|
#[inline(always)] |
|
|
#[inline(always)] |
|
|
fn manual_deserialize_str(row: &csv::StringRecord) -> Result<Trade, &'static str> { |
|
|
fn manual_deserialize_str(row: &csv::StringRecord) -> Result<Trade, &'static str> { |
|
|
let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?.as_bytes()) |
|
|
let time: u64 = atoi::atoi(row.get(0).ok_or("no time")?.as_bytes()) |
|
@@ -135,6 +136,42 @@ fn manual_deserialize_str(row: &csv::StringRecord) -> Result<Trade, &'static str |
|
|
Ok(Trade { time, amount, exch, price, ticker }) |
|
|
Ok(Trade { time, amount, exch, price, ticker }) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// Example of code used in discussion of increasing CSV parsing performance |
|
|
|
|
|
#[allow(dead_code)] |
|
|
|
|
|
fn fast_parse_bytes<R: Read>(mut rdr: csv::Reader<R>) -> Result<usize, String> { |
|
|
|
|
|
// our data is ascii, so parsing with the slightly faster ByteRecord is fine |
|
|
|
|
|
let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); |
|
|
|
|
|
let mut row = csv::ByteRecord::new(); |
|
|
|
|
|
|
|
|
|
|
|
// manual_deserialize_bytes assumes the column order of the CSV, |
|
|
|
|
|
// so here we verify that it actually matches that assumption |
|
|
|
|
|
assert_eq!(headers.get(0), Some(&b"time"[..])); |
|
|
|
|
|
assert_eq!(headers.get(1), Some(&b"amount"[..])); |
|
|
|
|
|
assert_eq!(headers.get(2), Some(&b"exch"[..])); |
|
|
|
|
|
assert_eq!(headers.get(3), Some(&b"price"[..])); |
|
|
|
|
|
assert_eq!(headers.get(6), Some(&b"ticker"[..])); |
|
|
|
|
|
|
|
|
|
|
|
let mut n = 0; |
|
|
|
|
|
let mut last_time = 0; |
|
|
|
|
|
|
|
|
|
|
|
while rdr.read_byte_record(&mut row) |
|
|
|
|
|
.map_err(|e| { |
|
|
|
|
|
format!("reading row {} failed: {}", (n+1).thousands_sep(), e) |
|
|
|
|
|
})? |
|
|
|
|
|
{ |
|
|
|
|
|
let trade: Trade = manual_deserialize_bytes(&row) |
|
|
|
|
|
.map_err(|e| { |
|
|
|
|
|
format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row) |
|
|
|
|
|
})?; |
|
|
|
|
|
assert!(trade.time >= last_time); |
|
|
|
|
|
last_time = trade.time; |
|
|
|
|
|
|
|
|
|
|
|
n += 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Ok(n) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> { |
|
|
fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> { |
|
|
let opt = Opt::from_args(); |
|
|
let opt = Opt::from_args(); |
|
|
|
|
|
|
|
@@ -157,44 +194,139 @@ fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> { |
|
|
|
|
|
|
|
|
let mut rdr = csv::Reader::from_reader(rdr); |
|
|
let mut rdr = csv::Reader::from_reader(rdr); |
|
|
|
|
|
|
|
|
// our data is ascii, so parsing with the slightly faster ByteRecord is fine |
|
|
|
|
|
//let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); |
|
|
|
|
|
//let mut row = csv::ByteRecord::new(); |
|
|
|
|
|
//assert_eq!(headers.get(0), Some(&b"time"[..])); |
|
|
|
|
|
//assert_eq!(headers.get(1), Some(&b"amount"[..])); |
|
|
|
|
|
//assert_eq!(headers.get(2), Some(&b"exch"[..])); |
|
|
|
|
|
//assert_eq!(headers.get(3), Some(&b"price"[..])); |
|
|
|
|
|
//assert_eq!(headers.get(6), Some(&b"ticker"[..])); |
|
|
|
|
|
|
|
|
|
|
|
//let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); |
|
|
|
|
|
|
|
|
// initializing --output-path CSV |
|
|
|
|
|
let wtr = fs::File::create(&opt.output_path) |
|
|
|
|
|
.map_err(|e| format!("creating output csv file failed: {} (tried to create {})", e, opt.output_path.display()))?; |
|
|
|
|
|
|
|
|
|
|
|
let wtr = io::BufWriter::new(wtr); |
|
|
|
|
|
|
|
|
|
|
|
let mut wtr = csv::Writer::from_writer(wtr); |
|
|
|
|
|
|
|
|
|
|
|
wtr.write_record(&["time","ratio","bmex","gdax","n_bmex","n_gdax","bmex_amt","gdax_amt"]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?; |
|
|
|
|
|
|
|
|
|
|
|
let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone(); |
|
|
let mut row = csv::StringRecord::new(); |
|
|
let mut row = csv::StringRecord::new(); |
|
|
|
|
|
|
|
|
|
|
|
// pull out first row to initialize query calculations |
|
|
|
|
|
rdr.read_record(&mut row).map_err(|e| format!("reading first row failed: {}", e))?; |
|
|
|
|
|
let trade: Trade = row.deserialize(Some(&headers)) |
|
|
|
|
|
.map_err(|e| { |
|
|
|
|
|
format!("deserializing first row failed: {}\n\nFailing row:\n{:?}", e, row) |
|
|
|
|
|
})?; |
|
|
|
|
|
|
|
|
|
|
|
let mut cur_hour = trade.time - trade.time % ONE_HOUR; |
|
|
|
|
|
let mut next_hour = cur_hour + ONE_HOUR; |
|
|
|
|
|
|
|
|
|
|
|
let mut bmex_total = if trade.exch == e!(bmex) { trade.price * trade.amount } else { 0.0 }; |
|
|
|
|
|
let mut bmex_amt = if trade.exch == e!(bmex) { trade.amount } else { 0.0 }; |
|
|
|
|
|
let mut n_bmex = 0; |
|
|
|
|
|
|
|
|
|
|
|
let mut gdax_total = if trade.exch == e!(gdax) { trade.price * trade.amount } else { 0.0 }; |
|
|
|
|
|
let mut gdax_amt = if trade.exch == e!(gdax) { trade.amount } else { 0.0 }; |
|
|
|
|
|
let mut n_gdax = 0; |
|
|
|
|
|
|
|
|
let mut n = 0; |
|
|
let mut n = 0; |
|
|
|
|
|
let mut n_written = 0; |
|
|
let mut last_time = 0; |
|
|
let mut last_time = 0; |
|
|
|
|
|
|
|
|
//while rdr.read_byte_record(&mut row) |
|
|
|
|
|
|
|
|
|
|
|
while rdr.read_record(&mut row) |
|
|
while rdr.read_record(&mut row) |
|
|
.map_err(|e| { |
|
|
.map_err(|e| { |
|
|
format!("reading row {} failed: {}", (n+1).thousands_sep(), e) |
|
|
format!("reading row {} failed: {}", (n+1).thousands_sep(), e) |
|
|
})? |
|
|
})? |
|
|
{ |
|
|
{ |
|
|
//let trade: Trade = row.deserialize(Some(&headers)) |
|
|
|
|
|
//let trade: Trade = manual_deserialize_bytes(&row) |
|
|
|
|
|
let trade: Trade = manual_deserialize_str(&row) |
|
|
|
|
|
|
|
|
let trade: Trade = row.deserialize(Some(&headers)) |
|
|
.map_err(|e| { |
|
|
.map_err(|e| { |
|
|
format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row) |
|
|
format!("deserializing row failed: {}\n\nFailing row:\n{:?}", e, row) |
|
|
})?; |
|
|
})?; |
|
|
|
|
|
|
|
|
n += 1; |
|
|
n += 1; |
|
|
|
|
|
|
|
|
|
|
|
if trade.ticker != t!(btc-usd) { continue } |
|
|
|
|
|
|
|
|
// verify data is sorted by time |
|
|
// verify data is sorted by time |
|
|
debug_assert!(trade.time >= last_time); |
|
|
|
|
|
|
|
|
assert!(trade.time >= last_time); |
|
|
last_time = trade.time; |
|
|
last_time = trade.time; |
|
|
|
|
|
|
|
|
|
|
|
if trade.time >= next_hour { // finalize last hour, and prepare for this hour |
|
|
|
|
|
if n_bmex == 0 || n_gdax == 0 { |
|
|
|
|
|
wtr.write_record(&[ |
|
|
|
|
|
&format!("{}", cur_hour), |
|
|
|
|
|
"NaN", |
|
|
|
|
|
"NaN", |
|
|
|
|
|
"NaN", |
|
|
|
|
|
&format!("{}", n_bmex), |
|
|
|
|
|
&format!("{}", n_gdax), |
|
|
|
|
|
&format!("{}", bmex_amt), |
|
|
|
|
|
&format!("{}", gdax_amt), |
|
|
|
|
|
]).map_err(|e| format!("writing output row failed: {}", e))?; |
|
|
|
|
|
} else { |
|
|
|
|
|
let bmex_wt_avg = bmex_total / bmex_amt; |
|
|
|
|
|
let gdax_wt_avg = gdax_total / gdax_amt; |
|
|
|
|
|
let ratio = bmex_wt_avg / gdax_wt_avg; |
|
|
|
|
|
wtr.write_record(&[ |
|
|
|
|
|
&format!("{}", cur_hour), |
|
|
|
|
|
&format!("{}", ratio), |
|
|
|
|
|
&format!("{}", bmex_wt_avg), |
|
|
|
|
|
&format!("{}", gdax_wt_avg), |
|
|
|
|
|
&format!("{}", n_bmex), |
|
|
|
|
|
&format!("{}", n_gdax), |
|
|
|
|
|
&format!("{}", bmex_amt), |
|
|
|
|
|
&format!("{}", gdax_amt), |
|
|
|
|
|
]).map_err(|e| format!("writing output row failed: {}", e))?; |
|
|
|
|
|
} |
|
|
|
|
|
n_written += 1; |
|
|
|
|
|
|
|
|
|
|
|
// reset state |
|
|
|
|
|
bmex_total = 0.0; |
|
|
|
|
|
bmex_amt = 0.0; |
|
|
|
|
|
gdax_total = 0.0; |
|
|
|
|
|
gdax_amt = 0.0; |
|
|
|
|
|
n_bmex = 0; |
|
|
|
|
|
n_gdax = 0; |
|
|
|
|
|
|
|
|
|
|
|
cur_hour = next_hour; |
|
|
|
|
|
next_hour += ONE_HOUR; |
|
|
|
|
|
|
|
|
|
|
|
// if we are skipping hours in between the last and current row, we |
|
|
|
|
|
// need to write a NaN row for the hours that had no data |
|
|
|
|
|
while next_hour <= trade.time { |
|
|
|
|
|
wtr.write_record(&[ |
|
|
|
|
|
&format!("{}", cur_hour), |
|
|
|
|
|
"NaN", |
|
|
|
|
|
"NaN", |
|
|
|
|
|
"NaN", |
|
|
|
|
|
"0", |
|
|
|
|
|
"0", |
|
|
|
|
|
"0.0", |
|
|
|
|
|
"0.0", |
|
|
|
|
|
]).map_err(|e| format!("writing output row failed: {}", e))?; |
|
|
|
|
|
|
|
|
|
|
|
n_written += 1; |
|
|
|
|
|
cur_hour = next_hour; |
|
|
|
|
|
next_hour += ONE_HOUR; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
match trade.exch { |
|
|
|
|
|
e!(bmex) => { |
|
|
|
|
|
bmex_total += trade.price * trade.amount; |
|
|
|
|
|
bmex_amt += trade.amount; |
|
|
|
|
|
n_bmex += 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
e!(gdax) => { |
|
|
|
|
|
gdax_total += trade.price * trade.amount; |
|
|
|
|
|
gdax_amt += trade.amount; |
|
|
|
|
|
n_gdax += 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
_ => {} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
if n % PROGRESS_EVERY == 0 || (cfg!(debug_assertions) && n % (1024 * 96) == 0) { |
|
|
if n % PROGRESS_EVERY == 0 || (cfg!(debug_assertions) && n % (1024 * 96) == 0) { |
|
|
info!(logger, "parsing csv file"; |
|
|
info!(logger, "parsing csv file"; |
|
|
"n rows" => %n.thousands_sep(), |
|
|
"n rows" => %n.thousands_sep(), |
|
|
|
|
|
"n written" => %n_written.thousands_sep(), |
|
|
"elapsed" => ?(Instant::now() - start), |
|
|
"elapsed" => ?(Instant::now() - start), |
|
|
); |
|
|
); |
|
|
} |
|
|
} |
|
@@ -202,12 +334,18 @@ fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> { |
|
|
if cfg!(debug_assertions) && n > PROGRESS_EVERY { |
|
|
if cfg!(debug_assertions) && n > PROGRESS_EVERY { |
|
|
warn!(logger, "debug mode: exiting early"; |
|
|
warn!(logger, "debug mode: exiting early"; |
|
|
"n rows" => %n.thousands_sep(), |
|
|
"n rows" => %n.thousands_sep(), |
|
|
|
|
|
"n written" => %n_written.thousands_sep(), |
|
|
"elapsed" => ?(Instant::now() - start), |
|
|
"elapsed" => ?(Instant::now() - start), |
|
|
); |
|
|
); |
|
|
break |
|
|
break |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// intentionally skipping the partial hour here |
|
|
|
|
|
|
|
|
|
|
|
info!(logger, "finished parsing CSV/calculating query. closing output file"); |
|
|
|
|
|
drop(wtr); |
|
|
|
|
|
|
|
|
Ok(n) |
|
|
Ok(n) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@@ -222,7 +360,10 @@ fn main() { |
|
|
match run(start, &logger) { |
|
|
match run(start, &logger) { |
|
|
Ok(n) => { |
|
|
Ok(n) => { |
|
|
let took = Instant::now() - start; |
|
|
let took = Instant::now() - start; |
|
|
info!(logger, "finished in {:?}", took; |
|
|
|
|
|
|
|
|
let took_secs = took.as_millis() as f64 / 1000.0; |
|
|
|
|
|
let took_str = format!("{}min, {:.1}sec", took.as_secs() / 60, took_secs % 60.0); |
|
|
|
|
|
|
|
|
|
|
|
info!(logger, "finished in {}", took_str; |
|
|
"n rows" => %n.thousands_sep(), |
|
|
"n rows" => %n.thousands_sep(), |
|
|
"rows/sec" => &((per_sec(n, took) * 100.0).round() / 10.0).thousands_sep(), |
|
|
"rows/sec" => &((per_sec(n, took) * 100.0).round() / 10.0).thousands_sep(), |
|
|
); |
|
|
); |
|
|