|
-
- def to_tframe(version, df, trades, start):
- d = {'bid': {}, 'ask': {}}
- cursor = 0
- n = 0
- n_periods = 40
- xs = np.concatenate([periods(n_periods)[:0:-1] * -1, periods(n_periods)]) * 1000000 # mult to convert to nanos
- mask = df['version'] == version
- my_trades = sorted(list(zip(df.loc[mask].index.values.astype(np.int64), df.loc[mask, 'side'], df.loc[mask, 'gid'])))
- idx = trades.index.values.astype(np.int64)
- amts = trades['amount']
- totals = trades['total']
- assert len(idx) == len(amts)
- assert len(idx) == len(totals)
- for tm, side, gid in my_trades:
- print '{} to_tfame {} {} (cursor = {})'.format(time.time() - start, version, n, cursor)
- min_time = tm + xs[0]
- max_time = tm + xs[1]
- if idx[cursor] > min_time:
- print 'warning: idx[cursor] ({}) > min_time ({})'.format(idx[cursor], min_time)
- while idx[cursor] > min_time and cursor > 0:
- cursor -= 1
- else:
- while idx[cursor] < min_time and cursor < len(idx) - 1:
- cursor += 1
- i = 1
- j = cursor
- d[side][gid] = {}
- while i < len(xs) - 1:
- wsum = 0.0
- w = 0.0
- while idx[j] < max_time:
- wsum += totals[j]
- w += amts[j]
- j += 1
- if w > 0.0:
- d[side][gid][xs[i]] = wsum / w
- else:
- d[side][gid][xs[i]] = np.nan
- i += 1
- min_time = max_time
- max_time = tm + xs[i]
- n += 1
- d['bid'] = sort_cols(pd.DataFrame.from_dict(d['bid'], orient='index'))
- d['ask'] = sort_cols(pd.DataFrame.from_dict(d['ask'], orient='index'))
- return d
-
-
- # code used to perform `to_tframe` on all versions:
-
- start = time.time()
- tresults = {}
- for version in version_counts.index:
- tresults[version] = to_tframe(version, df, trades, start)
- print '{} finished'.format(time.time() - start)
|