You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

403 lines
13KB

  1. #![allow(unused)]
  2. #[macro_use]
  3. extern crate slog;
  4. #[macro_use]
  5. extern crate markets;
  6. use std::io::{self, prelude::*};
  7. use std::fs;
  8. use std::path::{Path, PathBuf};
  9. use std::time::*;
  10. use pretty_toa::ThousandsSep;
  11. use structopt::StructOpt;
  12. use serde::{Serialize, Deserialize};
  13. use slog::Drain;
  14. use chrono::{DateTime, Utc, NaiveDateTime};
  15. use markets::crypto::{Exchange, Ticker, Side, Currency};
  16. use pipelines::encoding;
  17. use pipelines::windows::WeightedMeanWindow;
  18. macro_rules! fatal { ($fmt:expr, $($args:tt)*) => {{
  19. eprintln!($fmt, $($args)*);
  20. std::process::exit(1);
  21. }}}
  22. const PROGRESS_EVERY: usize = 1024 * 1024 * 16;
  23. const ONE_SECOND: u64 = 1_000_000_000;
  24. const ONE_HOUR: u64 = ONE_SECOND * 60 * 60;
  25. #[structopt(rename_all="kebab-case")]
  26. #[derive(Debug, StructOpt)]
  27. struct Opt {
  28. /// Path to file with binary trades data
  29. #[structopt(short = "f", long = "input-file")]
  30. #[structopt(parse(from_os_str))]
  31. input_path: PathBuf,
  32. /// Where to save the query results (CSV output)
  33. #[structopt(short = "o", long = "output-path")]
  34. #[structopt(parse(from_os_str))]
  35. output_path: PathBuf,
  36. #[structopt(short = "z", long = "hard-mode")]
  37. hard_mode: bool,
  38. }
  39. fn nanos_to_utc(nanos: u64) -> DateTime<Utc> {
  40. const ONE_SECOND: u64 = 1_000_000_000;
  41. let sec: i64 = (nanos / ONE_SECOND) as i64;
  42. let nsec: u32 = (nanos % ONE_SECOND) as u32;
  43. let naive = NaiveDateTime::from_timestamp(sec, nsec);
  44. DateTime::from_utc(naive, Utc)
  45. }
  46. fn per_sec(n: usize, span: Duration) -> f64 {
  47. if n == 0 || span < Duration::from_micros(1) { return 0.0 }
  48. let s: f64 = span.as_nanos() as f64 / 1e9f64;
  49. n as f64 / s
  50. }
  51. fn nanos(utc: DateTime<Utc>) -> u64 {
  52. (utc.timestamp() as u64) * 1_000_000_000_u64 + (utc.timestamp_subsec_nanos() as u64)
  53. }
  54. fn easy_query<W>(
  55. data: &memmap::Mmap,
  56. mut wtr: W,
  57. logger: &slog::Logger,
  58. ) -> Result<usize, String>
  59. where W: Write
  60. {
  61. let logger = logger.new(o!("easy-mode" => "whatever, man"));
  62. info!(logger, "beginning easy mode");
  63. let n_records = data.len() / encoding::SERIALIZED_SIZE;
  64. let mut n = 0;
  65. let mut n_written = 0;
  66. let mut records = data.chunks_exact(encoding::SERIALIZED_SIZE);
  67. let mut row_buffer: Vec<u8> = Vec::with_capacity(512);
  68. writeln!(&mut wtr, "time,ratio,bmex,gdax")
  69. .map_err(|e| format!("writing CSV headers to output file failed: {}", e))?;
  70. assert!(n_records > 0);
  71. let first = encoding::PackedTradeData::new(records.next().unwrap());
  72. n += 1;
  73. let mut cur_hour = first.time() - first.time() % ONE_HOUR;
  74. let mut next_hour = cur_hour + ONE_HOUR;
  75. let mut bmex_total = 0.0;
  76. let mut bmex_amount = 0.0;
  77. let mut n_bmex = 0;
  78. let mut gdax_total = 0.0;
  79. let mut gdax_amount = 0.0;
  80. let mut n_gdax = 0;
  81. macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body
  82. ($trade:ident) => {{
  83. match ($trade.exch(), $trade.base(), $trade.quote()) {
  84. (Ok(e!(bmex)), Ok(c!(btc)), Ok(c!(usd))) => {
  85. bmex_total += $trade.price() * $trade.amount();
  86. bmex_amount += $trade.amount();
  87. n_bmex += 1;
  88. }
  89. (Ok(e!(gdax)), Ok(c!(btc)), Ok(c!(usd))) => {
  90. gdax_total += $trade.price() * $trade.amount();
  91. gdax_amount += $trade.amount();
  92. n_gdax += 1;
  93. }
  94. _ => {}
  95. }
  96. }}
  97. }
  98. update!(first);
  99. for record in records {
  100. n += 1;
  101. let trade = encoding::PackedTradeData::new(record);
  102. if trade.time() > next_hour {
  103. row_buffer.clear();
  104. itoa::write(&mut row_buffer, cur_hour).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  105. if n_bmex == 0 || n_gdax == 0 {
  106. row_buffer.write(",NaN,NaN,NaN\n".as_bytes()).unwrap();
  107. } else {
  108. let bmex_wt_avg = bmex_total / bmex_amount;
  109. let gdax_wt_avg = gdax_total / gdax_amount;
  110. let ratio = bmex_wt_avg / gdax_wt_avg;
  111. row_buffer.push(b',');
  112. dtoa::write(&mut row_buffer, ratio).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  113. row_buffer.push(b',');
  114. dtoa::write(&mut row_buffer, bmex_wt_avg).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  115. row_buffer.push(b',');
  116. dtoa::write(&mut row_buffer, gdax_wt_avg).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  117. row_buffer.push(b'\n');
  118. }
  119. wtr.write_all(&row_buffer[..]).map_err(|e| format!("writing row failed: {}", e))?;
  120. n_written += 1;
  121. bmex_total = 0.0;
  122. bmex_amount = 0.0;
  123. gdax_total = 0.0;
  124. gdax_amount = 0.0;
  125. n_bmex = 0;
  126. n_gdax = 0;
  127. cur_hour = next_hour;
  128. next_hour += ONE_HOUR;
  129. // if we are skipping hours in between the last and current row, we
  130. // need to write a NaN row for the hours that had no data
  131. while next_hour <= trade.time() {
  132. writeln!(&mut wtr, "{},NaN,NaN,NaN", cur_hour)
  133. .map_err(|e| format!("writing output row failed: {}", e))?;
  134. n_written += 1;
  135. cur_hour = next_hour;
  136. next_hour += ONE_HOUR;
  137. }
  138. }
  139. update!(trade);
  140. if n % PROGRESS_EVERY == 0 {
  141. info!(logger, "calculating query";
  142. "n" => %n.thousands_sep(),
  143. "n_written" => %n_written.thousands_sep(),
  144. );
  145. }
  146. }
  147. info!(logger, "finished with easy query");
  148. Ok(n)
  149. }
  150. fn hard_query<W>(
  151. data: &memmap::Mmap,
  152. mut wtr: W,
  153. logger: &slog::Logger,
  154. ) -> Result<usize, String>
  155. where W: Write
  156. {
  157. let logger = logger.new(o!("hard-mode" => "challenge accepted"));
  158. info!(logger, "beginning hard mode");
  159. let n_records = data.len() / encoding::SERIALIZED_SIZE;
  160. let mut n = 0;
  161. let mut n_written = 0;
  162. let mut records = data.chunks_exact(encoding::SERIALIZED_SIZE);
  163. // pull out first row to initialize query calculations
  164. assert!(n_records > 0);
  165. let first = encoding::PackedTradeData::new(records.next().unwrap());
  166. n += 1;
  167. let mut cur_bucket = first.time() - (first.time() % (ONE_SECOND * 10)) + ONE_SECOND * 10;
  168. #[derive(Default, Clone)]
  169. struct Lookbacks<T> {
  170. pub p5: T,
  171. pub p15: T,
  172. pub p60: T,
  173. }
  174. let mut ratios: Lookbacks<f64> = Default::default();
  175. let mut bmex_windows: Lookbacks<WeightedMeanWindow> =
  176. Lookbacks {
  177. p5: WeightedMeanWindow::new(ONE_SECOND * 60 * 5 ),
  178. p15: WeightedMeanWindow::new(ONE_SECOND * 60 * 15),
  179. p60: WeightedMeanWindow::new(ONE_SECOND * 60 * 60),
  180. };
  181. let mut gdax_windows = bmex_windows.clone();
  182. let mut row_buffer: Vec<u8> = Vec::with_capacity(512);
  183. macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body
  184. ($trade:ident) => {{
  185. match ($trade.exch(), $trade.base(), $trade.quote()) {
  186. (Ok(e!(bmex)), Ok(c!(btc)), Ok(c!(usd))) => {
  187. bmex_windows.p5 .push($trade.time(), $trade.price(), $trade.amount());
  188. bmex_windows.p15.push($trade.time(), $trade.price(), $trade.amount());
  189. bmex_windows.p60.push($trade.time(), $trade.price(), $trade.amount());
  190. }
  191. (Ok(e!(gdax)), Ok(c!(btc)), Ok(c!(usd))) => {
  192. gdax_windows.p5 .push($trade.time(), $trade.price(), $trade.amount());
  193. gdax_windows.p15.push($trade.time(), $trade.price(), $trade.amount());
  194. gdax_windows.p60.push($trade.time(), $trade.price(), $trade.amount());
  195. }
  196. _ => {}
  197. }
  198. }}
  199. }
  200. writeln!(&mut wtr, "time,r5,r15,r60")
  201. .map_err(|e| format!("writing CSV headers to output file failed: {}", e))?;
  202. update!(first);
  203. for record in records {
  204. n += 1;
  205. let trade = encoding::PackedTradeData::new(record);
  206. if trade.time() > cur_bucket {
  207. debug!(logger, "about to purge";
  208. "n" => n,
  209. "n written" => n_written,
  210. "trade.time" => trade.time(),
  211. "cur_bucket" => cur_bucket,
  212. "gdax p5 len" => gdax_windows.p5.len(),
  213. "gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
  214. );
  215. bmex_windows.p5 .purge(cur_bucket);
  216. bmex_windows.p15.purge(cur_bucket);
  217. bmex_windows.p60.purge(cur_bucket);
  218. gdax_windows.p5 .purge(cur_bucket);
  219. gdax_windows.p15.purge(cur_bucket);
  220. gdax_windows.p60.purge(cur_bucket);
  221. debug!(logger, "finished purge";
  222. "n" => n,
  223. "n written" => n_written,
  224. "trade.time" => trade.time(),
  225. "cur_bucket" => cur_bucket,
  226. "gdax p5 len" => gdax_windows.p5.len(),
  227. "gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
  228. );
  229. ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean();
  230. ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean();
  231. ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean();
  232. //row_buffers.iter_mut().for_each(|x| x.clear());
  233. row_buffer.clear();
  234. itoa::write(&mut row_buffer, cur_bucket).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  235. row_buffer.push(b',');
  236. dtoa::write(&mut row_buffer, ratios.p5 ).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  237. row_buffer.push(b',');
  238. dtoa::write(&mut row_buffer, ratios.p15).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  239. row_buffer.push(b',');
  240. dtoa::write(&mut row_buffer, ratios.p60).map_err(|e| format!("serializing number to buffer failed: {}", e))?;
  241. row_buffer.push(b'\n');
  242. wtr.write_all(&row_buffer[..]).map_err(|e| format!("writing row failed: {}", e))?;
  243. n_written += 1;
  244. cur_bucket += ONE_SECOND * 10;
  245. }
  246. update!(trade);
  247. if n % PROGRESS_EVERY == 0 {
  248. info!(logger, "calculating hard query";
  249. "n" => %n.thousands_sep(),
  250. "n_written" => %n_written.thousands_sep(),
  251. "ratios.p5" => ratios.p5,
  252. "ratios.p15" => ratios.p15,
  253. "ratios.p60" => ratios.p60,
  254. );
  255. }
  256. }
  257. info!(logger, "finished with hard query");
  258. Ok(n)
  259. }
  260. fn run(start: Instant, logger: &slog::Logger) -> Result<usize, String> {
  261. let Opt { input_path, output_path, hard_mode } = Opt::from_args();
  262. info!(logger, "beginning to count";
  263. "input_path" => %input_path.display(),
  264. );
  265. if ! input_path.exists() { return Err(format!("--input-file path does not exist: {}", input_path.display())) }
  266. let input_file =
  267. fs::OpenOptions::new()
  268. .read(true)
  269. .open(input_path)
  270. .map_err(|e| e.to_string())?;
  271. let file_length = input_file.metadata().unwrap().len();
  272. if file_length % encoding::SERIALIZED_SIZE as u64 != 0 || file_length == 0 {
  273. return Err(format!("file length is not a multiple of record size: {}", file_length))
  274. }
  275. let n_records: usize = file_length as usize / encoding::SERIALIZED_SIZE;
  276. info!(logger, "parsing file"; "n_records" => %n_records.thousands_sep());
  277. let data: memmap::Mmap = unsafe {
  278. memmap::Mmap::map(&input_file)
  279. .map_err(|e| {
  280. format!("creating Mmap failed: {}", e)
  281. })?
  282. };
  283. info!(logger, "opening output file for writing");
  284. let wtr = fs::File::create(&output_path)
  285. .map_err(|e| format!("opening output file failed: {} (tried to open {} for writing)", e, output_path.display()))?;
  286. let wtr = io::BufWriter::new(wtr);
  287. if hard_mode {
  288. hard_query(&data, wtr, &logger)
  289. } else {
  290. easy_query(&data, wtr, &logger)
  291. }
  292. }
  293. fn main() {
  294. let start = Instant::now();
  295. let decorator = slog_term::TermDecorator::new().stdout().force_color().build();
  296. let drain = slog_term::FullFormat::new(decorator).use_utc_timestamp().build().fuse();
  297. let drain = slog_async::Async::new(drain).chan_size(1024 * 64).thread_name("recv".into()).build().fuse();
  298. let logger = slog::Logger::root(drain, o!("version" => structopt::clap::crate_version!()));
  299. match run(start, &logger) {
  300. Ok(n) => {
  301. let took = Instant::now() - start;
  302. info!(logger, "finished in {:?}", took;
  303. "n rows" => %n.thousands_sep(),
  304. "rows/sec" => &((per_sec(n, took) * 100.0).round() / 100.0).thousands_sep(),
  305. );
  306. }
  307. Err(e) => {
  308. crit!(logger, "run failed: {:?}", e);
  309. eprintln!("\n\nError: {}", e);
  310. std::thread::sleep(Duration::from_millis(100));
  311. std::process::exit(1);
  312. }
  313. }
  314. }