You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

python-68min-analysis-the-function.py 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. def to_tframe(version, df, trades, start):
  2. d = {'bid': {}, 'ask': {}}
  3. cursor = 0
  4. n = 0
  5. n_periods = 40
  6. xs = np.concatenate([periods(n_periods)[:0:-1] * -1, periods(n_periods)]) * 1000000 # mult to convert to nanos
  7. mask = df['version'] == version
  8. my_trades = sorted(list(zip(df.loc[mask].index.values.astype(np.int64), df.loc[mask, 'side'], df.loc[mask, 'gid'])))
  9. idx = trades.index.values.astype(np.int64)
  10. amts = trades['amount']
  11. totals = trades['total']
  12. assert len(idx) == len(amts)
  13. assert len(idx) == len(totals)
  14. for tm, side, gid in my_trades:
  15. print '{} to_tfame {} {} (cursor = {})'.format(time.time() - start, version, n, cursor)
  16. min_time = tm + xs[0]
  17. max_time = tm + xs[1]
  18. if idx[cursor] > min_time:
  19. print 'warning: idx[cursor] ({}) > min_time ({})'.format(idx[cursor], min_time)
  20. while idx[cursor] > min_time and cursor > 0:
  21. cursor -= 1
  22. else:
  23. while idx[cursor] < min_time and cursor < len(idx) - 1:
  24. cursor += 1
  25. i = 1
  26. j = cursor
  27. d[side][gid] = {}
  28. while i < len(xs) - 1:
  29. wsum = 0.0
  30. w = 0.0
  31. while idx[j] < max_time:
  32. wsum += totals[j]
  33. w += amts[j]
  34. j += 1
  35. if w > 0.0:
  36. d[side][gid][xs[i]] = wsum / w
  37. else:
  38. d[side][gid][xs[i]] = np.nan
  39. i += 1
  40. min_time = max_time
  41. max_time = tm + xs[i]
  42. n += 1
  43. d['bid'] = sort_cols(pd.DataFrame.from_dict(d['bid'], orient='index'))
  44. d['ask'] = sort_cols(pd.DataFrame.from_dict(d['ask'], orient='index'))
  45. return d
  46. # code used to perform `to_tframe` on all versions:
  47. start = time.time()
  48. tresults = {}
  49. for version in version_counts.index:
  50. tresults[version] = to_tframe(version, df, trades, start)
  51. print '{} finished'.format(time.time() - start)