def to_tframe(version, df, trades, start): d = {'bid': {}, 'ask': {}} cursor = 0 n = 0 n_periods = 40 xs = np.concatenate([periods(n_periods)[:0:-1] * -1, periods(n_periods)]) * 1000000 # mult to convert to nanos mask = df['version'] == version my_trades = sorted(list(zip(df.loc[mask].index.values.astype(np.int64), df.loc[mask, 'side'], df.loc[mask, 'gid']))) idx = trades.index.values.astype(np.int64) amts = trades['amount'] totals = trades['total'] assert len(idx) == len(amts) assert len(idx) == len(totals) for tm, side, gid in my_trades: print '{} to_tfame {} {} (cursor = {})'.format(time.time() - start, version, n, cursor) min_time = tm + xs[0] max_time = tm + xs[1] if idx[cursor] > min_time: print 'warning: idx[cursor] ({}) > min_time ({})'.format(idx[cursor], min_time) while idx[cursor] > min_time and cursor > 0: cursor -= 1 else: while idx[cursor] < min_time and cursor < len(idx) - 1: cursor += 1 i = 1 j = cursor d[side][gid] = {} while i < len(xs) - 1: wsum = 0.0 w = 0.0 while idx[j] < max_time: wsum += totals[j] w += amts[j] j += 1 if w > 0.0: d[side][gid][xs[i]] = wsum / w else: d[side][gid][xs[i]] = np.nan i += 1 min_time = max_time max_time = tm + xs[i] n += 1 d['bid'] = sort_cols(pd.DataFrame.from_dict(d['bid'], orient='index')) d['ask'] = sort_cols(pd.DataFrame.from_dict(d['ask'], orient='index')) return d # code used to perform `to_tframe` on all versions: start = time.time() tresults = {} for version in version_counts.index: tresults[version] = to_tframe(version, df, trades, start) print '{} finished'.format(time.time() - start)