|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- #![allow(unused)]
-
- #[macro_use]
- extern crate slog;
- #[macro_use]
- extern crate markets;
-
- use std::io::{self, prelude::*};
- use std::fs;
- use std::path::{Path, PathBuf};
- use std::time::*;
- use pretty_toa::ThousandsSep;
- use structopt::StructOpt;
- use serde::{Serialize, Deserialize};
- use slog::Drain;
- use chrono::{DateTime, Utc, NaiveDateTime};
- use markets::crypto::{Exchange, Ticker, Side, Currency};
-
- macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{
- eprintln!($fmt, $($args)*);
- std::process::exit(1);
- }}}
-
- const PROGRESS_EVERY: usize = 1024 * 1024 * 4;
-
-
- #[structopt(rename_all="kebab-case")]
- #[derive(Debug, StructOpt)]
- enum Opt {
-
- /// Filter trades-csv by start,end range and save subset to output-path
- ///
- /// Note: csv assumed to be pre-sorted by time (ascending)
- ///
- Range {
-
- /// Path to CSV file with trades data
- #[structopt(short = "f", long = "trades-csv")]
- #[structopt(parse(from_os_str))]
- trades_csv: PathBuf,
-
- /// Where to save the query results (CSV output)
- #[structopt(short = "o", long = "output-path")]
- #[structopt(parse(from_os_str))]
- output_path: PathBuf,
-
- /// rfc3339 format ("YYYY-MM-DDTHH:MM:SSZ")
- #[structopt(short = "s", long = "start")]
- start: DateTime<Utc>,
-
- /// rfc3339 format ("YYYY-MM-DDTHH:MM:SSZ")
- #[structopt(short = "e", long = "end")]
- end: DateTime<Utc>,
-
- },
-
- /// Convert the original csv info a format ready to be ingested via COPY
- ///
- /// 1. server_time of 0 -> NULL
- /// 2. side of "na" -> NULL
- PrepPostgres {
-
- /// Path to CSV file with trades data
- #[structopt(short = "f", long = "trades-csv")]
- #[structopt(parse(from_os_str))]
- trades_csv: PathBuf,
-
- /// Where to save the query results (CSV output)
- #[structopt(short = "o", long = "output-path")]
- #[structopt(parse(from_os_str))]
- output_path: PathBuf,
- },
-
- ListCodes,
-
-
- }
-
- #[derive(Deserialize)]
- struct Trade {
- /// Unix nanoseconds
- pub time: u64,
- pub exch: Exchange,
- pub ticker: Ticker,
- //pub side: Option<Side>,
- pub price: f64,
- pub amount: f64,
- }
-
- #[derive(Deserialize, Debug)]
- struct PgBuilder<'a> {
- pub time: u64,
- pub exch: Exchange,
- pub ticker: Ticker,
- pub side: Option<&'a str>,
- pub price: f64,
- pub amount: f64,
- pub server_time: u64,
- }
-
- #[derive(Serialize, Debug)]
- struct PgRow {
- pub time: DateTime<Utc>,
- pub exch: u8,
- pub base: u8,
- pub quote: u8,
- pub amount: f64,
- pub price: f64,
- pub side: Option<u8>,
- pub server_time: Option<DateTime<Utc>>,
- }
-
- fn nanos_to_utc(nanos: u64) -> DateTime<Utc> {
- const ONE_SECOND: u64 = 1_000_000_000;
- let sec: i64 = (nanos / ONE_SECOND) as i64;
- let nsec: u32 = (nanos % ONE_SECOND) as u32;
- let naive = NaiveDateTime::from_timestamp(sec, nsec);
- DateTime::from_utc(naive, Utc)
- }
-
- fn per_sec(n: usize, span: Duration) -> f64 {
- if n == 0 || span < Duration::from_micros(1) { return 0.0 }
- let s: f64 = span.as_nanos() as f64 / 1e9f64;
- n as f64 / s
- }
-
- fn nanos(utc: DateTime<Utc>) -> u64 {
- (utc.timestamp() as u64) * 1_000_000_000_u64 + (utc.timestamp_subsec_nanos() as u64)
- }
-
- fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> {
- let opt = Opt::from_args();
-
- let mut n = 0;
-
- match opt {
- Opt::PrepPostgres { trades_csv, output_path } => {
- let logger = logger.new(o!("cmd" => "prep-postgres"));
-
- info!(logger, "beginning prep-postgres cmd";
- "trades_csv" => %trades_csv.display(),
- "output_path" => %output_path.display(),
- );
-
- if ! trades_csv.exists() { return Err(format!("--trades-csv path does not exist: {}", trades_csv.display())) }
-
- info!(logger, "opening trades_csv file");
-
- let rdr = fs::File::open(&trades_csv)
- .map_err(|e| format!("opening trades csv file failed: {} (tried to open {})", e, trades_csv.display()))?;
-
- let rdr = io::BufReader::new(rdr);
-
- let mut rdr = csv::Reader::from_reader(rdr);
-
- info!(logger, "opening output file for writing");
-
- let wtr = fs::File::create(&output_path)
- .map_err(|e| format!("opening output file failed: {} (tried to open {} for writing)", e, output_path.display()))?;
-
- let wtr = io::BufWriter::new(wtr);
-
- let mut wtr = csv::Writer::from_writer(wtr);
-
- let headers: csv::StringRecord = rdr.headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
- let mut row = csv::StringRecord::new();
-
- //wtr.write_record(&headers).map_err(|e| format!("writing headers row failed: {}", e))?;
-
- while rdr.read_record(&mut row)
- .map_err(|e| {
- format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- })?
- {
- let bldr: PgBuilder = row.deserialize(Some(&headers)).map_err(|e| format!("deser failed: {}", e))?;
- let PgBuilder { time, exch, ticker, side, price, amount, server_time } = bldr;
- let time = nanos_to_utc(time);
- let exch = u8::from(exch);
- let base = u8::from(ticker.base);
- let quote = u8::from(ticker.quote);
- let side: Option<u8> = match side {
- Some("bid") => Some(1),
- Some("ask") => Some(2),
- _ => None,
- };
- let server_time = match server_time {
- 0 => None,
- x => Some(nanos_to_utc(x)),
- };
- let pg_row = PgRow { time, exch, base, quote, amount, price, side, server_time };
-
- wtr.serialize(&pg_row).map_err(|e| format!("serializing PgRow to csv failed: {}", e))?;
-
- n += 1;
- if n % PROGRESS_EVERY == 0 {
- info!(logger, "parsing/writing csv rows"; "n" => %n.thousands_sep());
- }
- }
- }
-
- Opt::ListCodes => {
- println!("side: {:?} {}", Side::Bid, u8::from(Side::Bid));
- println!("side: {:?} {}", Side::Ask, u8::from(Side::Ask));
- println!();
-
- for exch in Exchange::all() {
- println!("INSERT INTO exchanges (id, symbol) VALUES ({}, '{}');", u8::from(exch), exch.as_str());
- }
-
- for currency in Currency::all() {
- println!("INSERT INTO currencies (id, symbol) VALUES ({}, '{}');", u8::from(currency), currency.as_str());
- }
- }
-
- Opt::Range { trades_csv, output_path, start, end } => {
- let logger = logger.new(o!("cmd" => "range"));
-
- info!(logger, "beginning range cmd";
- "trades_csv" => %trades_csv.display(),
- "output_path" => %output_path.display(),
- "start" => %start,
- "end" => %end,
- );
-
- if ! trades_csv.exists() { return Err(format!("--trades-csv path does not exist: {}", trades_csv.display())) }
-
- info!(logger, "opening trades_csv file");
-
- let rdr = fs::File::open(&trades_csv)
- .map_err(|e| format!("opening trades csv file failed: {} (tried to open {})", e, trades_csv.display()))?;
-
- let rdr = io::BufReader::new(rdr);
-
- let mut rdr = csv::Reader::from_reader(rdr);
-
- info!(logger, "opening output file for writing");
-
- let wtr = fs::File::create(&output_path)
- .map_err(|e| format!("opening output file failed: {} (tried to open {} for writing)", e, output_path.display()))?;
-
- let wtr = io::BufWriter::new(wtr);
-
- let mut wtr = csv::Writer::from_writer(wtr);
-
- let headers: csv::ByteRecord = rdr.byte_headers().map_err(|e| format!("failed to parse CSV headers: {}", e))?.clone();
-
- let time_col: usize = headers.iter().position(|x| x == b"time").ok_or_else(|| {
- String::from("no column in headers named 'time'")
- })?;
-
- let mut row = csv::ByteRecord::new();
-
- let start_nanos = nanos(start);
- let end_nanos = nanos(end);
-
- let mut n_written = 0;
- let mut time: u64 = 0;
-
- info!(logger, "writing headers row to output file");
- wtr.write_byte_record(&headers).map_err(|e| format!("writing csv headers row failed: {}", e))?;
-
- info!(logger, "entering csv parsing loop");
-
- 'a: while rdr.read_byte_record(&mut row)
- .map_err(|e| {
- format!("reading row {} failed: {}", (n+1).thousands_sep(), e)
- })?
- {
- let time_bytes = row.get(time_col).ok_or_else(|| "time column not present for row")?;
-
- time = atoi::atoi(time_bytes).ok_or_else(|| {
- format!("failed to parse 'time' col value '{}' as integer", std::str::from_utf8(time_bytes).unwrap_or("utf8err"))
- })?;
-
- n += 1;
-
- if n % PROGRESS_EVERY == 0 {
- info!(logger, "parsing csv rows"; "n" => %n.thousands_sep(), "n_written" => %n_written.thousands_sep());
- }
-
- if time < start_nanos { continue 'a }
-
- if time > end_nanos { break 'a }
-
- wtr.write_byte_record(&row).map_err(|e| format!("writing parsed csv row to output file failed: {}", e))?;
-
- n_written += 1;
- }
-
- info!(logger, "broke out of read csv loop"; "time" => time, "end_nanos" => end_nanos, "n" => %n.thousands_sep(), "n_written" => %n_written.thousands_sep());
-
- info!(logger, "dropping wtr");
-
- drop(wtr);
- }
- }
-
- Ok(n)
- }
-
- fn main() {
- let start = Instant::now();
-
- let decorator = slog_term::TermDecorator::new().stdout().force_color().build();
- let drain = slog_term::FullFormat::new(decorator).use_utc_timestamp().build().fuse();
- let drain = slog_async::Async::new(drain).chan_size(1024 * 64).thread_name("recv".into()).build().fuse();
- let logger = slog::Logger::root(drain, o!("version" => structopt::clap::crate_version!()));
-
- match run(start, &logger) {
- Ok(n) => {
- let took = Instant::now() - start;
- info!(logger, "finished in {:?}", took;
- "n rows" => %n.thousands_sep(),
- "rows/sec" => &((per_sec(n, took) * 100.0).round() / 10.0).thousands_sep(),
- );
- }
-
- Err(e) => {
- crit!(logger, "run failed: {:?}", e);
- eprintln!("\n\nError: {}", e);
- std::thread::sleep(Duration::from_millis(100));
- std::process::exit(1);
- }
- }
-
- }
|