You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

423 lines
15KB

  1. #![allow(unused_imports)]
  2. #[macro_use] extern crate serde_derive;
  3. //extern crate decimal;
  4. extern crate clap;
  5. extern crate serde;
  6. extern crate csv;
  7. extern crate itertools_num;
  8. extern crate map;
  9. use std::str::FromStr;
  10. use std::time::{Instant, Duration};
  11. use std::{fs, io};
  12. use std::str::from_utf8;
  13. use std::error::Error;
  14. use itertools_num::linspace;
  15. use map::Map;
  16. const N: usize = 128;
  17. const LOGSPACE: [i64; 128] =
  18. [-2134300000000, -1854700000000, -1611800000000, -1400600000000,
  19. -1217200000000, -1057700000000, -919200000000, -798800000000,
  20. -694100000000, -603200000000, -524200000000, -455500000000,
  21. -395800000000, -344000000000, -298900000000, -259700000000,
  22. -225700000000, -196100000000, -170400000000, -148100000000,
  23. -128700000000, -111800000000, -97200000000, -84400000000,
  24. -73400000000, -63800000000, -55400000000, -48100000000,
  25. -41800000000, -36300000000, -31600000000, -27400000000,
  26. -23800000000, -20700000000, -18000000000, -15600000000,
  27. -13600000000, -11800000000, -10200000000, -8900000000,
  28. -7700000000, -6700000000, -5800000000, -5000000000,
  29. -4400000000, -3800000000, -3300000000, -2900000000,
  30. -2500000000, -2100000000, -1900000000, -1600000000,
  31. -1400000000, -1200000000, -1000000000, -900000000,
  32. -800000000, -700000000, -600000000, -500000000,
  33. -400000000, -300000000, -200000000, -100000000,
  34. 100000000, 200000000, 300000000, 400000000,
  35. 500000000, 600000000, 700000000, 800000000,
  36. 900000000, 1000000000, 1200000000, 1400000000,
  37. 1600000000, 1900000000, 2100000000, 2500000000,
  38. 2900000000, 3300000000, 3800000000, 4400000000,
  39. 5000000000, 5800000000, 6700000000, 7700000000,
  40. 8900000000, 10200000000, 11800000000, 13600000000,
  41. 15600000000, 18000000000, 20700000000, 23800000000,
  42. 27400000000, 31600000000, 36300000000, 41800000000,
  43. 48100000000, 55400000000, 63800000000, 73400000000,
  44. 84400000000, 97200000000, 111800000000, 128700000000,
  45. 148100000000, 170400000000, 196100000000, 225700000000,
  46. 259700000000, 298900000000, 344000000000, 395800000000,
  47. 455500000000, 524200000000, 603200000000, 694100000000,
  48. 798800000000, 919200000000, 1057700000000, 1217200000000,
  49. 1400600000000, 1611800000000, 1854700000000, 2134300000000];
  50. #[derive(Deserialize)]
  51. struct Trade { //<'a> {
  52. pub time: i64,
  53. //pub exch: &'a str,
  54. pub price: f64,
  55. pub amount: f64,
  56. }
  57. /// Use this to deserialize just the time column on the first pass through
  58. /// the events file.
  59. #[derive(Deserialize)]
  60. struct EventTime {
  61. pub time: i64,
  62. }
  63. struct Event {
  64. pub time: i64,
  65. //pub meta: Vec<&'a str>,
  66. //pub meta: &'a csv::StringRecord,
  67. pub data: Vec<f64>,
  68. }
  69. pub fn seconds(d: Duration) -> f64 {
  70. d.as_secs() as f64 + (d.subsec_nanos() as f64 / 1_000_000_000_f64)
  71. }
  72. fn main() -> Result<(), Box<Error>> {
  73. let start = Instant::now();
  74. let args: clap::ArgMatches = clap::App::new("time-explorer")
  75. .version("0.1")
  76. .arg(clap::Arg::with_name("trades")
  77. .long("trades-csv")
  78. .short("t")
  79. .help("Path of csv with time (integer nanoseconds timestamp), \
  80. price (f64), and amount (f64) columns.")
  81. .takes_value(true)
  82. .required(true))
  83. .arg(clap::Arg::with_name("events")
  84. .long("events-csv")
  85. .short("e")
  86. .help("Path of csv file with a time (integer nanoseconds timestamp) as column 0, \
  87. along with any other metadata columns that will be included in results")
  88. .takes_value(true)
  89. .required(true))
  90. .arg(clap::Arg::with_name("output")
  91. .long("output-file")
  92. .short("o")
  93. .help("Path to save results csv to")
  94. .takes_value(true)
  95. .required(true))
  96. .arg(clap::Arg::with_name("verbose")
  97. .long("verbose")
  98. .short("v"))
  99. .arg(clap::Arg::with_name("n-periods")
  100. .long("n-periods")
  101. .short("n")
  102. .help("Controls how many time buckets are evaluated")
  103. .takes_value(true)
  104. .default_value("50"))
  105. .get_matches();
  106. //assert_eq!(N, LOGSPACE.len());
  107. let verbose = args.is_present("verbose");
  108. if verbose { println!("{:>8.2}s reading...", seconds(Instant::now() - start)); }
  109. let trades_csv = args.value_of("trades").unwrap();
  110. let events_csv = args.value_of("events").unwrap();
  111. let output = args.value_of("output").unwrap();
  112. let n: &str = args.value_of("n-periods").unwrap();
  113. let n: usize = usize::from_str(n)?;
  114. //println!("{:>8.2s} opening files...", seconds(Instant::now() - start));
  115. let trades_csv =
  116. fs::OpenOptions::new()
  117. .read(true)
  118. .open(trades_csv)?;
  119. let mut trades: Vec<Trade> =
  120. csv::Reader::from_reader(trades_csv)
  121. .deserialize()
  122. .map(|x| x.unwrap())
  123. .collect();
  124. //println!("{:>8.2} sorting trades...", seconds(Instant::now() - start));
  125. trades.sort_by_key(|k| k.time);
  126. // for i in 1..(trades.len() - 1) {
  127. // assert!(trades[i].time >= trades[i-1].time);
  128. // }
  129. //println!("{:>8.2}s arranging in columnar format...", seconds(Instant::now() - start));
  130. let mut times: Vec<i64> = Vec::with_capacity(trades.len());
  131. //let mut prices: Vec<f64> = Vec::with_capacity(trades.len());
  132. let mut amounts: Vec<f64> = Vec::with_capacity(trades.len());
  133. let mut totals: Vec<f64> = Vec::with_capacity(trades.len());
  134. for Trade { time, price, amount } in trades {
  135. times.push(time);
  136. totals.push(price * amount);
  137. amounts.push(amount);
  138. }
  139. // let mut headers = csv::ByteRecord::new();
  140. // rdr.read_byte_record(&mut headers)?;
  141. // assert!(&headers.as_slice()[headers.range(0).unwrap()] == b"time");
  142. // let mut record = csv::ByteRecord::new();
  143. //let headers = rdr.headers()?;
  144. //assert_eq!(headers.get(0), Some("time"));
  145. let mut events: Vec<Event> = {
  146. let events_csv =
  147. fs::OpenOptions::new()
  148. .read(true)
  149. .open(events_csv)?;
  150. csv::Reader::from_reader(events_csv)
  151. .deserialize()
  152. .map(|t: Result<EventTime, _>| {
  153. let EventTime { time } = t.unwrap();
  154. //let data = [0.0; N - 1];
  155. let data = vec![0.0; n - 1];
  156. Event { time, data }
  157. }).collect()
  158. };
  159. //while rdr.read_byte_record(&mut record)? {
  160. /*
  161. for record in rdr.records() {
  162. let meta = record?;
  163. //let time = i64::from_str(from_utf8(&record[0])?)?;
  164. let time = i64::from_str(&meta[0])?;
  165. // let meta =
  166. // record.iter()
  167. // .skip(1)
  168. // //.map(|x| x.deserialize().unwrap())
  169. // .collect();
  170. let data = [0.0; N - 1];
  171. events.push(Event { time, data });
  172. }
  173. */
  174. assert!(!events.is_empty());
  175. // for i in 1..(events.len() - 1) {
  176. // assert!(events[i].time >= events[i-1].time);
  177. // }
  178. events.sort_by_key(|k| k.time);
  179. //println!("events[0].time={}, LOGSPACE[0]={}, events[0].time + LOGSPACE[0]={}",
  180. // events[0].time, LOGSPACE[0], events[0].time + LOGSPACE[0]);
  181. let mut cursor: usize = 0;
  182. let mut truncate_events = None;
  183. let buckets: Vec<i64> =
  184. linspace(LOGSPACE[0] as f64, LOGSPACE[N - 1] as f64, n)
  185. .map(|x| x as i64)
  186. .collect();
  187. if verbose { println!("{:>8.2}s calculating...", seconds(Instant::now() - start)); }
  188. 'a: for (i, event) in events.iter_mut().enumerate() {
  189. //let mut min_time: i64 = event.time + LOGSPACE[0];
  190. //let mut max_time: i64 = event.time + LOGSPACE[1];
  191. let mut min_time: i64 = event.time + buckets[0];
  192. let mut max_time: i64 = event.time + buckets[1];
  193. /*
  194. let n_nan = event.data.iter().filter(|x| !x.is_finite()).count();
  195. println!("{:>8.2} event {:>5} {:>8.2}, {:>8.2}, {:>8.2}, ... min_time={}, max_time={}, \
  196. cursor={}, j={}, times[cursor]={}, times[j]={}, n_nan={}, max_time-min_time={}", seconds(Instant::now() - start), i,
  197. event.data[0], event.data[10], event.data[100], min_time, max_time, cursor,
  198. "_", times[cursor], "_", n_nan, max_time-min_time);
  199. println!();
  200. */
  201. // find the beginning if there are gaps
  202. 'b: while times[cursor] < min_time {
  203. if cursor >= times.len() - 1 {
  204. truncate_events = Some(i);
  205. break 'a
  206. } else {
  207. cursor += 1
  208. }
  209. }
  210. let mut j: usize = cursor;
  211. 'c: for k in 0..(n - 2) {
  212. let mut wsum: f64 = 0.0;
  213. let mut w: f64 = 0.0;
  214. 'd: while j < times.len() - 1 && times[j] < max_time {
  215. wsum += totals[j];
  216. w += amounts[j];
  217. j += 1;
  218. }
  219. if w > 0.0 {
  220. event.data[k] = wsum / w;
  221. } else {
  222. event.data[k] = ::std::f64::NAN;
  223. }
  224. /*
  225. let n_nan = event.data.iter().filter(|x| !x.is_finite()).count();
  226. println!("{:>8.2} event {:>5} {:>8.2}, {:>8.2}, {:>8.2}, ... min_time={}, max_time={}, \
  227. cursor={}, j={}, times[cursor]={}, times[j]={}, n_nan={}, max_time-min_time={} \
  228. wsum={:.3}, w={:.3}", seconds(Instant::now() - start), i,
  229. event.data[0], event.data[10], event.data[100], min_time, max_time, cursor,
  230. j, times[cursor], times[j], n_nan, max_time-min_time, wsum, w);
  231. */
  232. min_time = max_time;
  233. max_time = event.time + buckets[k + 2];
  234. //println!("{:>8.2} event {:>5} {:>8.2}, {:>8.2}, {:>8.2}, ... min_time={}, max_time={}, \
  235. // cursor={}, j={}, times[cursor]={}, n_nan={}, max_time-min_time={}", seconds(Instant::now() - start), i,
  236. // event.data[0], event.data[10], event.data[100], min_time, max_time, cursor,
  237. // j, times[cursor], n_nan, max_time-min_time);
  238. }
  239. /*
  240. let n_nan = event.data.iter().filter(|x| !x.is_finite()).count();
  241. println!("{:>8.2} event {:>5} {:>8.2}, {:>8.2}, {:>8.2}, ... min_time={}, max_time={}, \
  242. cursor={}, j={}, times[cursor]={}, times[j]={}, n_nan={}, max_time-min_time={}", seconds(Instant::now() - start), i,
  243. event.data[0], event.data[10], event.data[100], min_time, max_time, cursor,
  244. j, times[cursor], times[j], n_nan, max_time-min_time);
  245. println!();
  246. */
  247. if i % 100 == 0 {
  248. assert!(max_time > min_time);
  249. if verbose {
  250. //let n_nan = event.data.iter().filter(|x| !x.is_finite()).count();
  251. println!("{:>8.2}s No. {:>5} {:>12.2}, {:>12.2}, {:>12.2}, {:>12.2}, {:>12.2}, {:>12.2} ...",
  252. //cursor={}, j={}, times[cursor]={}, n_nan={}, max_time-min_time={}",
  253. seconds(Instant::now() - start), i,
  254. event.data[0], event.data[20], event.data[40], event.data[60], event.data[80], event.data[100]);
  255. //min_time, max_time, cursor,
  256. //j, times[cursor], n_nan, max_time-min_time);
  257. }
  258. }
  259. }
  260. assert!(truncate_events.is_none()); // for now
  261. if verbose { println!("{:>8.2} writing...", seconds(Instant::now() - start)); }
  262. // we have to read this again because I could not figure out ownership problems
  263. let events_csv =
  264. fs::OpenOptions::new()
  265. .read(true)
  266. .open(events_csv)?;
  267. let mut events_csv = csv::Reader::from_reader(events_csv);
  268. let output_csv =
  269. fs::OpenOptions::new()
  270. .write(true)
  271. .create(true)
  272. .truncate(true)
  273. .open(output)?;
  274. let mut wtr = csv::Writer::from_writer(output_csv);
  275. let data_cols: Vec<i64> = {
  276. let mut xs = vec![0; n - 1];
  277. for i in 0..(n - 1) {
  278. //xs[i] = ((LOGSPACE[i] + LOGSPACE[i + 1]) as f64 / 2.0) as i64;
  279. xs[i] = (buckets[i] + buckets[i + 1]) / 2;
  280. }
  281. xs
  282. };
  283. {
  284. let headers = events_csv.byte_headers()?;
  285. //println!("{:?}", headers);
  286. for col in headers.iter() {
  287. wtr.write_field(col)?;
  288. }
  289. for col in data_cols.iter() {
  290. wtr.write_field(&format!("{}", col))?;
  291. }
  292. wtr.write_record(None::<&[u8]>)?;
  293. }
  294. let mut record = csv::ByteRecord::new();
  295. for event in events {
  296. if !events_csv.read_byte_record(&mut record)? { panic!("failed to read from events csv") }
  297. for meta in record.iter() {
  298. wtr.write_field(meta)?;
  299. }
  300. for val in event.data.iter() {
  301. wtr.write_field(&format!("{}", val))?;
  302. }
  303. wtr.write_record(None::<&[u8]>)?;
  304. }
  305. if verbose { println!("{:>8.2} finished.", seconds(Instant::now() - start)); }
  306. Ok(())
  307. }
  308. /*
  309. def to_tframe(version, df, trades, start):
  310. d = {'bid': {}, 'ask': {}}
  311. cursor = 0
  312. n = 0
  313. n_periods = 40
  314. xs = np.concatenate([periods(n_periods)[:0:-1] * -1, periods(n_periods)]) * 1000000 # mult to convert to nanos
  315. mask = df['version'] == version
  316. #my_trades = sorted(list(zip(df.loc[mask].index, df.loc[mask, 'side'], df.loc[mask, 'gid'])))
  317. my_trades = sorted(list(zip(df.loc[mask].index.values.astype(np.int64), df.loc[mask, 'side'], df.loc[mask, 'gid'])))
  318. #idx = trades.index
  319. idx = trades.index.values.astype(np.int64)
  320. amts = trades['amount']
  321. totals = trades['total']
  322. assert len(idx) == len(amts)
  323. assert len(idx) == len(totals)
  324. for tm, side, gid in my_trades:
  325. print '{} to_tfame {} {} (cursor = {})'.format(time.time() - start, version, n, cursor)
  326. #min_time = tm + timedelta(milliseconds=xs[0])
  327. #max_time = tm + timedelta(milliseconds=xs[1])
  328. min_time = tm + xs[0]
  329. max_time = tm + xs[1]
  330. if idx[cursor] > min_time:
  331. print 'warning: idx[cursor] ({}) > min_time ({})'.format(idx[cursor], min_time)
  332. while idx[cursor] > min_time and cursor > 0:
  333. cursor -= 1
  334. else:
  335. while idx[cursor] < min_time and cursor < len(idx) - 1:
  336. cursor += 1
  337. i = 1
  338. j = cursor
  339. d[side][gid] = {}
  340. while i < len(xs) - 1:
  341. wsum = 0.0
  342. w = 0.0
  343. while idx[j] < max_time:
  344. wsum += totals[j]
  345. w += amts[j]
  346. j += 1
  347. if w > 0.0:
  348. d[side][gid][xs[i]] = wsum / w
  349. else:
  350. d[side][gid][xs[i]] = np.nan
  351. i += 1
  352. min_time = max_time
  353. #max_time = tm + timedelta(milliseconds=xs[i])
  354. max_time = tm + xs[i]
  355. n += 1
  356. d['bid'] = sort_cols(pd.DataFrame.from_dict(d['bid'], orient='index'))
  357. d['ask'] = sort_cols(pd.DataFrame.from_dict(d['ask'], orient='index'))
  358. #yield (version, d)
  359. return d
  360. */