@@ -36,6 +36,9 @@ chrono = { version = "0.4", features = ["serde"] } | |||
clap = "2" | |||
itertools-num = "0.1" | |||
[dev-dependencies] | |||
approx = "0.3" | |||
[profile.release] | |||
lto = "fat" | |||
panic = "abort" | |||
@@ -0,0 +1,514 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 1, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"CPU times: user 1min 8s, sys: 7.72 s, total: 1min 16s\n", | |||
"Wall time: 1min 16s\n", | |||
"<class 'pandas.core.frame.DataFrame'>\n", | |||
"RangeIndex: 92331988 entries, 0 to 92331987\n", | |||
"Data columns (total 7 columns):\n", | |||
"time int64\n", | |||
"amount float64\n", | |||
"exch object\n", | |||
"price float64\n", | |||
"server_time int64\n", | |||
"side object\n", | |||
"ticker object\n", | |||
"dtypes: float64(2), int64(2), object(3)\n", | |||
"memory usage: 4.8+ GB\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"%time df = pd.read_csv('/xfs/sample.csv')\n", | |||
"df.info()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 2, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/html": [ | |||
"<div>\n", | |||
"<style scoped>\n", | |||
" .dataframe tbody tr th:only-of-type {\n", | |||
" vertical-align: middle;\n", | |||
" }\n", | |||
"\n", | |||
" .dataframe tbody tr th {\n", | |||
" vertical-align: top;\n", | |||
" }\n", | |||
"\n", | |||
" .dataframe thead th {\n", | |||
" text-align: right;\n", | |||
" }\n", | |||
"</style>\n", | |||
"<table border=\"1\" class=\"dataframe\">\n", | |||
" <thead>\n", | |||
" <tr style=\"text-align: right;\">\n", | |||
" <th></th>\n", | |||
" <th>time</th>\n", | |||
" <th>amount</th>\n", | |||
" <th>exch</th>\n", | |||
" <th>price</th>\n", | |||
" <th>server_time</th>\n", | |||
" <th>side</th>\n", | |||
" <th>ticker</th>\n", | |||
" </tr>\n", | |||
" </thead>\n", | |||
" <tbody>\n", | |||
" <tr>\n", | |||
" <td>0</td>\n", | |||
" <td>1561939200002479372</td>\n", | |||
" <td>1.4894</td>\n", | |||
" <td>bnce</td>\n", | |||
" <td>292.7000</td>\n", | |||
" <td>1561939199919000064</td>\n", | |||
" <td>NaN</td>\n", | |||
" <td>eth_usd</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>1</td>\n", | |||
" <td>1561939200011035644</td>\n", | |||
" <td>0.0833</td>\n", | |||
" <td>btfx</td>\n", | |||
" <td>10809.0000</td>\n", | |||
" <td>1561939199927000064</td>\n", | |||
" <td>bid</td>\n", | |||
" <td>btc_usd</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>2</td>\n", | |||
" <td>1561939200011055712</td>\n", | |||
" <td>0.8333</td>\n", | |||
" <td>btfx</td>\n", | |||
" <td>10809.0000</td>\n", | |||
" <td>1561939199927000064</td>\n", | |||
" <td>bid</td>\n", | |||
" <td>btc_usd</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>3</td>\n", | |||
" <td>1561939200019037617</td>\n", | |||
" <td>0.0831</td>\n", | |||
" <td>bnce</td>\n", | |||
" <td>10854.1000</td>\n", | |||
" <td>1561939199935000064</td>\n", | |||
" <td>NaN</td>\n", | |||
" <td>btc_usd</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>4</td>\n", | |||
" <td>1561939200026450471</td>\n", | |||
" <td>0.1250</td>\n", | |||
" <td>okex</td>\n", | |||
" <td>123.2100</td>\n", | |||
" <td>1561939200026450432</td>\n", | |||
" <td>ask</td>\n", | |||
" <td>ltc_usd</td>\n", | |||
" </tr>\n", | |||
" </tbody>\n", | |||
"</table>\n", | |||
"</div>" | |||
], | |||
"text/plain": [ | |||
" time amount exch price server_time side \\\n", | |||
"0 1561939200002479372 1.4894 bnce 292.7000 1561939199919000064 NaN \n", | |||
"1 1561939200011035644 0.0833 btfx 10809.0000 1561939199927000064 bid \n", | |||
"2 1561939200011055712 0.8333 btfx 10809.0000 1561939199927000064 bid \n", | |||
"3 1561939200019037617 0.0831 bnce 10854.1000 1561939199935000064 NaN \n", | |||
"4 1561939200026450471 0.1250 okex 123.2100 1561939200026450432 ask \n", | |||
"\n", | |||
" ticker \n", | |||
"0 eth_usd \n", | |||
"1 btc_usd \n", | |||
"2 btc_usd \n", | |||
"3 btc_usd \n", | |||
"4 ltc_usd " | |||
] | |||
}, | |||
"execution_count": 2, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"df.head()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 7, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"(True, True, True)" | |||
] | |||
}, | |||
"execution_count": 7, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"SECOND = int(1e9)\n", | |||
"\n", | |||
"example_time = 1567295920000000000\n", | |||
"\n", | |||
"last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", | |||
"last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", | |||
"last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", | |||
"of_btc_usd = df['ticker'] == 'btc_usd'\n", | |||
"of_gdax = df['exch'] == 'gdax'\n", | |||
"of_bmex = df['exch'] == 'bmex'\n", | |||
"\n", | |||
"g5 = last_5min & of_btc_usd & of_gdax\n", | |||
"b5 = last_5min & of_btc_usd & of_bmex\n", | |||
"g15 = last_15min & of_btc_usd & of_gdax\n", | |||
"b15 = last_15min & of_btc_usd & of_bmex\n", | |||
"g60 = last_60min & of_btc_usd & of_gdax\n", | |||
"b60 = last_60min & of_btc_usd & of_bmex\n", | |||
"\n", | |||
"ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", | |||
"ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", | |||
"ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", | |||
"\n", | |||
"abs(ratio_5min - 1.000474060563638) < 1e-6, abs(ratio_15min - 1.0005019306061411) < 1e-6, abs(ratio_60min - 1.0002338013889658) < 1e-6" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 8, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/html": [ | |||
"<div>\n", | |||
"<style scoped>\n", | |||
" .dataframe tbody tr th:only-of-type {\n", | |||
" vertical-align: middle;\n", | |||
" }\n", | |||
"\n", | |||
" .dataframe tbody tr th {\n", | |||
" vertical-align: top;\n", | |||
" }\n", | |||
"\n", | |||
" .dataframe thead th {\n", | |||
" text-align: right;\n", | |||
" }\n", | |||
"</style>\n", | |||
"<table border=\"1\" class=\"dataframe\">\n", | |||
" <thead>\n", | |||
" <tr style=\"text-align: right;\">\n", | |||
" <th></th>\n", | |||
" <th>time</th>\n", | |||
" <th>last</th>\n", | |||
" <th>bmex_5min</th>\n", | |||
" <th>gdax_5min</th>\n", | |||
" <th>n_bmex_p5</th>\n", | |||
" <th>n_gdax_p5</th>\n", | |||
" <th>r5</th>\n", | |||
" <th>r15</th>\n", | |||
" <th>r60</th>\n", | |||
" </tr>\n", | |||
" </thead>\n", | |||
" <tbody>\n", | |||
" <tr>\n", | |||
" <td>0</td>\n", | |||
" <td>1561939210000000000</td>\n", | |||
" <td>10758.5800</td>\n", | |||
" <td>10760.7205</td>\n", | |||
" <td>10760.4593</td>\n", | |||
" <td>22</td>\n", | |||
" <td>28</td>\n", | |||
" <td>1.0000</td>\n", | |||
" <td>1.0000</td>\n", | |||
" <td>1.0000</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>1</td>\n", | |||
" <td>1561939220000000000</td>\n", | |||
" <td>10770.0000</td>\n", | |||
" <td>10763.6811</td>\n", | |||
" <td>10761.2528</td>\n", | |||
" <td>230</td>\n", | |||
" <td>75</td>\n", | |||
" <td>1.0002</td>\n", | |||
" <td>1.0002</td>\n", | |||
" <td>1.0002</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>2</td>\n", | |||
" <td>1561939230000000000</td>\n", | |||
" <td>10758.0100</td>\n", | |||
" <td>10761.8843</td>\n", | |||
" <td>10760.1596</td>\n", | |||
" <td>418</td>\n", | |||
" <td>120</td>\n", | |||
" <td>1.0002</td>\n", | |||
" <td>1.0002</td>\n", | |||
" <td>1.0002</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>3</td>\n", | |||
" <td>1561939240000000000</td>\n", | |||
" <td>10752.5000</td>\n", | |||
" <td>10757.6829</td>\n", | |||
" <td>10760.3630</td>\n", | |||
" <td>507</td>\n", | |||
" <td>147</td>\n", | |||
" <td>0.9998</td>\n", | |||
" <td>0.9998</td>\n", | |||
" <td>0.9998</td>\n", | |||
" </tr>\n", | |||
" <tr>\n", | |||
" <td>4</td>\n", | |||
" <td>1561939250000000000</td>\n", | |||
" <td>10772.6900</td>\n", | |||
" <td>10757.5702</td>\n", | |||
" <td>10763.0840</td>\n", | |||
" <td>537</td>\n", | |||
" <td>191</td>\n", | |||
" <td>0.9995</td>\n", | |||
" <td>0.9995</td>\n", | |||
" <td>0.9995</td>\n", | |||
" </tr>\n", | |||
" </tbody>\n", | |||
"</table>\n", | |||
"</div>" | |||
], | |||
"text/plain": [ | |||
" time last bmex_5min gdax_5min n_bmex_p5 n_gdax_p5 \\\n", | |||
"0 1561939210000000000 10758.5800 10760.7205 10760.4593 22 28 \n", | |||
"1 1561939220000000000 10770.0000 10763.6811 10761.2528 230 75 \n", | |||
"2 1561939230000000000 10758.0100 10761.8843 10760.1596 418 120 \n", | |||
"3 1561939240000000000 10752.5000 10757.6829 10760.3630 507 147 \n", | |||
"4 1561939250000000000 10772.6900 10757.5702 10763.0840 537 191 \n", | |||
"\n", | |||
" r5 r15 r60 \n", | |||
"0 1.0000 1.0000 1.0000 \n", | |||
"1 1.0002 1.0002 1.0002 \n", | |||
"2 1.0002 1.0002 1.0002 \n", | |||
"3 0.9998 0.9998 0.9998 \n", | |||
"4 0.9995 0.9995 0.9995 " | |||
] | |||
}, | |||
"execution_count": 8, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"ref = pd.read_csv('../var/hard.csv')\n", | |||
"ref.head()" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 12, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"array([1567295750000000000, 1567295760000000000, 1567295770000000000, 1567295780000000000, 1567295790000000000, 1567295800000000000, 1567295810000000000, 1567295820000000000, 1567295830000000000,\n", | |||
" 1567295840000000000, 1567295850000000000, 1567295860000000000, 1567295870000000000, 1567295880000000000, 1567295890000000000, 1567295900000000000, 1567295910000000000, 1567295920000000000,\n", | |||
" 1567295930000000000, 1567295940000000000, 1567295950000000000, 1567295960000000000, 1567295970000000000, 1567295980000000000, 1567295990000000000])" | |||
] | |||
}, | |||
"execution_count": 12, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"ref['time'].tail(25).values" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 13, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"finished in 487.8sec\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"import time\n", | |||
"\n", | |||
"start = time.time()\n", | |||
"rows = []\n", | |||
"\n", | |||
"for example_time in ref['time'].tail(25).values:\n", | |||
" last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", | |||
" last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", | |||
" last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", | |||
" of_btc_usd = df['ticker'] == 'btc_usd'\n", | |||
" of_gdax = df['exch'] == 'gdax'\n", | |||
" of_bmex = df['exch'] == 'bmex'\n", | |||
"\n", | |||
" g5 = last_5min & of_btc_usd & of_gdax\n", | |||
" b5 = last_5min & of_btc_usd & of_bmex\n", | |||
" g15 = last_15min & of_btc_usd & of_gdax\n", | |||
" b15 = last_15min & of_btc_usd & of_bmex\n", | |||
" g60 = last_60min & of_btc_usd & of_gdax\n", | |||
" b60 = last_60min & of_btc_usd & of_bmex\n", | |||
"\n", | |||
" ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", | |||
" ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", | |||
" ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", | |||
" rows.append(dict(example_time=example_time, r5=ratio_5min, r15=ratio_15min, r60=ratio_60min))\n", | |||
" \n", | |||
"took = time.time() - start\n", | |||
"print('finished in {:.1f}sec'.format(took))" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 19, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"3.342554545733013" | |||
] | |||
}, | |||
"execution_count": 19, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"hypothetical_full_took = (took / 25) * 5401808\n", | |||
"hypothetical_full_took / 60 / 60 / 24 / 365" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 20, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"0.1016643329480867" | |||
] | |||
}, | |||
"execution_count": 20, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"92331988 / 908204336" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 21, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"19.513984975814818" | |||
] | |||
}, | |||
"execution_count": 21, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"took / 25" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 24, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"('105,410,800.2', 105410800.15423629)" | |||
] | |||
}, | |||
"execution_count": 24, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"'{:,.1f}'.format(hypothetical_full_took), hypothetical_full_took" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 32, | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"data": { | |||
"text/plain": [ | |||
"r5_delta 0.000000000002368\n", | |||
"r15_delta 0.000000000010704\n", | |||
"r60_delta 0.000000000005513\n", | |||
"dtype: object" | |||
] | |||
}, | |||
"execution_count": 32, | |||
"metadata": {}, | |||
"output_type": "execute_result" | |||
} | |||
], | |||
"source": [ | |||
"(pd.DataFrame(rows).join(ref.set_index('time'), on='example_time', rsuffix='_rust')\n", | |||
" .assign(r5_delta=lambda df: abs(df['r5'] - df['r5_rust']))\n", | |||
" .assign(r15_delta=lambda df: abs(df['r15'] - df['r15_rust']))\n", | |||
" .assign(r60_delta=lambda df: abs(df['r60'] - df['r60_rust']))\n", | |||
")[['r5_delta','r15_delta','r60_delta']].max(axis=0).map(lambda x: '{:.15f}'.format(x))" | |||
] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.7.5" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 2 | |||
} |
@@ -15,7 +15,7 @@ use serde::{Serialize, Deserialize}; | |||
use slog::Drain; | |||
use pretty_toa::ThousandsSep; | |||
use markets::crypto::{Exchange, Ticker, Side}; | |||
use pipelines::windows::WeightedAvgWindow; | |||
use pipelines::windows::WeightedMeanWindow; | |||
// equivalent to panic! but without the ugly 'thread main panicked' yada yada | |||
@@ -214,24 +214,28 @@ fn hard_mode<R, W>( | |||
let mut ratios: Lookbacks<f64> = Default::default(); | |||
let mut bwindows: Lookbacks<WeightedAvgWindow> = | |||
let mut bmex_windows: Lookbacks<WeightedMeanWindow> = | |||
Lookbacks { | |||
p5: WeightedAvgWindow::new(ONE_SECOND * 60 * 5 ), | |||
p15: WeightedAvgWindow::new(ONE_SECOND * 60 * 15), | |||
p60: WeightedAvgWindow::new(ONE_SECOND * 60 * 60), | |||
p5: WeightedMeanWindow::new(ONE_SECOND * 60 * 5 ), | |||
p15: WeightedMeanWindow::new(ONE_SECOND * 60 * 15), | |||
p60: WeightedMeanWindow::new(ONE_SECOND * 60 * 60), | |||
}; | |||
let mut gwindows = bwindows.clone(); | |||
let mut gdax_windows = bmex_windows.clone(); | |||
#[inline(always)] | |||
fn do_purge(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64) { | |||
if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_wt_mean().unwrap_or(NAN); } | |||
if windows.p15.purge(time) { prices.p15 = windows.p15.checked_wt_mean().unwrap_or(NAN); } | |||
if windows.p60.purge(time) { prices.p60 = windows.p60.checked_wt_mean().unwrap_or(NAN); } | |||
fn do_purge(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64) { | |||
//if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_weighted_mean().unwrap_or(NAN); } | |||
//if windows.p15.purge(time) { prices.p15 = windows.p15.checked_weighted_mean().unwrap_or(NAN); } | |||
//if windows.p60.purge(time) { prices.p60 = windows.p60.checked_weighted_mean().unwrap_or(NAN); } | |||
windows.p5 .purge(time); | |||
windows.p15.purge(time); | |||
windows.p60.purge(time); | |||
} | |||
#[allow(unused)] | |||
#[inline(always)] | |||
fn do_update(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) { | |||
fn do_update(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) { | |||
//prices.p5 = windows.p5 .update(time, price, amount).unwrap_or(NAN); | |||
//prices.p15 = windows.p15.update(time, price, amount).unwrap_or(NAN); | |||
//prices.p60 = windows.p60.update(time, price, amount).unwrap_or(NAN); | |||
@@ -239,21 +243,20 @@ fn hard_mode<R, W>( | |||
windows.p5 .push(time, price, amount); | |||
windows.p15.push(time, price, amount); | |||
windows.p60.push(time, price, amount); | |||
} | |||
macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body | |||
($trade:ident) => {{ | |||
match $trade.exch { | |||
e!(bmex) => { | |||
do_update(&mut bwindows, &mut bprices, $trade.time, $trade.price, $trade.amount); | |||
//do_purge(&mut gwindows, &mut gprices, $trade.time); | |||
do_update(&mut bmex_windows, &mut bprices, $trade.time, $trade.price, $trade.amount); | |||
//do_purge(&mut gdax_windows, &mut gprices, $trade.time); | |||
last_price = $trade.price; | |||
} | |||
e!(gdax) => { | |||
do_update(&mut gwindows, &mut gprices, $trade.time, $trade.price, $trade.amount); | |||
//do_purge(&mut bwindows, &mut bprices, $trade.time); | |||
do_update(&mut gdax_windows, &mut gprices, $trade.time, $trade.price, $trade.amount); | |||
//do_purge(&mut bmex_windows, &mut bprices, $trade.time); | |||
last_price = $trade.price; | |||
} | |||
@@ -264,11 +267,11 @@ fn hard_mode<R, W>( | |||
wtr.write_record(&[ | |||
"time", | |||
"last", | |||
"bmex_5min", | |||
"gdax_5min", | |||
"n_bmex_p5", | |||
"n_gdax_p5", | |||
//"last", | |||
//"bmex_5min", | |||
//"gdax_5min", | |||
//"n_bmex_p5", | |||
//"n_gdax_p5", | |||
"r5", | |||
"r15", | |||
"r60", | |||
@@ -279,7 +282,7 @@ fn hard_mode<R, W>( | |||
//"n_gdax_p15", | |||
//"n_gdax_p60", | |||
//"gdax_p5_is_empty", | |||
//"gdax_p5_checked_wt_mean", | |||
//"gdax_p5_checked_weighted_mean", | |||
//"tradetime_minus_cur_bucket", | |||
]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?; | |||
@@ -306,48 +309,48 @@ fn hard_mode<R, W>( | |||
"n written" => n_written, | |||
"trade.time" => trade.time, | |||
"cur_bucket" => cur_bucket, | |||
"gdax p5 len" => gwindows.p5.len(), | |||
"gdax p5 wt avg" => gwindows.p5.wt_mean(), | |||
"gdax p5 len" => gdax_windows.p5.len(), | |||
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(), | |||
); | |||
do_purge(&mut gwindows, &mut gprices, cur_bucket); | |||
do_purge(&mut bwindows, &mut bprices, cur_bucket); | |||
do_purge(&mut gdax_windows, &mut gprices, cur_bucket); | |||
do_purge(&mut bmex_windows, &mut bprices, cur_bucket); | |||
debug!(logger, "finished purge"; | |||
"n" => n, | |||
"n written" => n_written, | |||
"trade.time" => trade.time, | |||
"cur_bucket" => cur_bucket, | |||
"gdax p5 len" => gwindows.p5.len(), | |||
"gdax p5 wt avg" => gwindows.p5.wt_mean(), | |||
"gdax p5 len" => gdax_windows.p5.len(), | |||
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(), | |||
); | |||
ratios.p5 = bwindows.p5 .checked_wt_mean().unwrap_or(NAN) / gwindows.p5 .checked_wt_mean().unwrap_or(NAN); | |||
ratios.p15 = bwindows.p15.checked_wt_mean().unwrap_or(NAN) / gwindows.p15.checked_wt_mean().unwrap_or(NAN); | |||
ratios.p60 = bwindows.p60.checked_wt_mean().unwrap_or(NAN) / gwindows.p60.checked_wt_mean().unwrap_or(NAN); | |||
ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean(); | |||
ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean(); | |||
ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean(); | |||
//ratios.p5 = bwindows.p5 .wt_mean() / gwindows.p5 .wt_mean(); | |||
//ratios.p15 = bwindows.p15.wt_mean() / gwindows.p15.wt_mean(); | |||
//ratios.p60 = bwindows.p60.wt_mean() / gwindows.p60.wt_mean(); | |||
//ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean(); | |||
//ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean(); | |||
//ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean(); | |||
wtr.write_record(&[ | |||
&format!("{}", cur_bucket), | |||
&format!("{}", last_price), | |||
&format!("{}", bwindows.p5.checked_wt_mean().unwrap_or(NAN)), | |||
&format!("{}", gwindows.p5.checked_wt_mean().unwrap_or(NAN)), | |||
&format!("{}", bwindows.p5.len()), | |||
&format!("{}", gwindows.p5.len()), | |||
//&format!("{}", last_price), | |||
//&format!("{}", bmex_windows.p5.checked_weighted_mean().unwrap_or(NAN)), | |||
//&format!("{}", gdax_windows.p5.checked_weighted_mean().unwrap_or(NAN)), | |||
//&format!("{}", bmex_windows.p5.len()), | |||
//&format!("{}", gdax_windows.p5.len()), | |||
&format!("{}", ratios.p5), | |||
&format!("{}", ratios.p15), | |||
&format!("{}", ratios.p60), | |||
//&format!("{}", bwindows.p15.len()), | |||
//&format!("{}", gwindows.p60.len()), | |||
//&format!("{}", gwindows.p15.len()), | |||
//&format!("{}", gwindows.p15.len()), | |||
//&format!("{}", bwindows.p60.len()), | |||
//&format!("{}", bwindows.p5.is_empty()), | |||
//&format!("{:?}", bwindows.p5.checked_wt_mean()), | |||
//&format!("{}", bmex_windows.p15.len()), | |||
//&format!("{}", gdax_windows.p60.len()), | |||
//&format!("{}", gdax_windows.p15.len()), | |||
//&format!("{}", gdax_windows.p15.len()), | |||
//&format!("{}", bmex_windows.p60.len()), | |||
//&format!("{}", bmex_windows.p5.is_empty()), | |||
//&format!("{:?}", bmex_windows.p5.checked_weighted_mean()), | |||
//&format!("{}", trade.time - cur_bucket), | |||
]).map_err(|e| { | |||
@@ -1,5 +1,28 @@ | |||
use std::collections::VecDeque; | |||
/// Calculates online weighted average for a rolling, time-based window | |||
#[derive(Clone)] | |||
pub struct WeightedMeanWindow { | |||
/// The size of the window. On `purge`, any `WeightedPoint` items are considered | |||
/// expired if the supplied `time` parameter is greater than `size` from the | |||
/// `time` attribute of that `WeightedPoint` item. | |||
size: u64, | |||
/// The weights and values with times that are "currently" in the aggregation | |||
/// window. On `push`, items are added to the "back" of the vedeque. On `purge`, | |||
/// items with a `time` that is > `size` difference relative to the `time` passed | |||
/// to `purge` are considered expired and removed. In both cases, adding and removing, | |||
/// the incremental accumulated sums in `w_sum` and `sum_w` are updated. | |||
items: VecDeque<WeightedPoint>, | |||
/// The sum of the value * weight for each of the `WeightedPoint`s in `items`. | |||
w_sum: f64, | |||
/// The sum of the weights of each of the `WeightedPoint`s in `items`. | |||
sum_w: f64, | |||
} | |||
/// Stores the time, value and weight for an item "currently" inside the | |||
/// aggregation window of a `WeightedMeanWindow`, allowing its value and | |||
/// weight to be subtracted from the accumulated sums of the window when | |||
/// the item becomes expired. | |||
#[derive(Debug, Clone)] | |||
pub struct WeightedPoint { | |||
pub time: u64, | |||
@@ -12,16 +35,7 @@ pub struct WeightedPoint { | |||
pub wt: f64, | |||
} | |||
#[derive(Clone)] | |||
pub struct WeightedAvgWindow { | |||
size: u64, | |||
items: VecDeque<WeightedPoint>, | |||
w_sum: f64, | |||
sum_w: f64, | |||
//w_mean: f64, | |||
} | |||
impl WeightedAvgWindow { | |||
impl WeightedMeanWindow { | |||
pub fn new(size: u64) -> Self { | |||
Self { | |||
size, | |||
@@ -35,9 +49,19 @@ impl WeightedAvgWindow { | |||
/// | |||
/// Returns `true` if any items were removed. | |||
pub fn purge(&mut self, time: u64) -> bool { | |||
// this is somewhat awkwardly implemented, but there is not anything like | |||
// `drain_while` on `VecDeque` (or `Vec`) that would work like `take_while`, | |||
// except also removing the items. Since we need the data in the items we | |||
// are removing to update `sum_w` and `w_sum`, we loop over the expired | |||
// items first, counting them in `n_remove`, then actually remove them | |||
// in a second pass. | |||
let mut n_remove = 0; | |||
{ | |||
// extra scope needed to shush the borrow checker | |||
let items = &self.items; | |||
let w_sum = &mut self.w_sum; | |||
let sum_w = &mut self.sum_w; | |||
@@ -52,7 +76,10 @@ impl WeightedAvgWindow { | |||
for _ in 0..n_remove { self.items.pop_front(); } | |||
// when items is empty, set w_sum, sum_w to 0.0 | |||
// when items is empty, set w_sum, sum_w to 0.0. the motive | |||
// of this approach, versus an if block with assignment, is | |||
// for the code to be "branchless" and do the same work each | |||
// time, in a cache- and branch predictor-friendly manner. | |||
let zeroer: f64 = ( ! self.items.is_empty()) as u8 as f64; | |||
self.w_sum *= zeroer; | |||
self.sum_w *= zeroer; | |||
@@ -61,6 +88,10 @@ impl WeightedAvgWindow { | |||
} | |||
/// Add a new item, updating incremental calculations in the process. | |||
/// | |||
/// Note: it is assumed that `time` is >= the highest `time` value for any previous | |||
/// item. The expiration logic `purge` relies on the items being added to a | |||
/// `WeightedMeanWindow` in chronological order. | |||
pub fn push(&mut self, time: u64, val: f64, wt: f64) { | |||
let wt_val: f64 = val * wt; | |||
self.w_sum += wt_val; | |||
@@ -72,24 +103,26 @@ impl WeightedAvgWindow { | |||
/// accumulators. | |||
/// | |||
/// Note; this value is not cached. | |||
pub fn wt_mean(&self) -> f64 { | |||
pub fn weighted_mean(&self) -> f64 { | |||
self.w_sum / self.sum_w | |||
} | |||
/// Checks whether items `is_empty` before trying to calculate. | |||
/// Returns None if items is empty. | |||
pub fn checked_wt_mean(&self) -> Option<f64> { | |||
/// | |||
/// Note: this value is not cached. | |||
pub fn checked_weighted_mean(&self) -> Option<f64> { | |||
match self.is_empty() { | |||
true => None, | |||
false => Some(self.w_sum / self.sum_w), | |||
} | |||
} | |||
/// Purge, push and get `checked_wt_mean`, all in one convenient step. | |||
/// Purge, push and get `checked_weighted_mean`, all in one convenient step. | |||
pub fn update(&mut self, time: u64, val: f64, wt: f64) -> Option<f64> { | |||
self.purge(time); | |||
self.push(time, val, wt); | |||
self.checked_wt_mean() | |||
self.checked_weighted_mean() | |||
} | |||
pub fn len(&self) -> usize { self.items.len() } | |||
@@ -97,3 +130,80 @@ impl WeightedAvgWindow { | |||
pub fn is_empty(&self) -> bool { self.items.is_empty() } | |||
} | |||
#[allow(unused)] | |||
#[cfg(test)] | |||
mod tests { | |||
use super::*; | |||
use approx::assert_relative_eq; | |||
#[test] | |||
fn weighted_mean_output_matches_numpy_average() { | |||
let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248, | |||
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ]; | |||
let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117, | |||
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ]; | |||
let mut w = WeightedMeanWindow::new(1_000_000_000); | |||
for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() { | |||
w.push(i as u64, val, weight); | |||
} | |||
w.purge(11); | |||
assert_eq!(w.items.len(), 10); | |||
assert_relative_eq!(w.weighted_mean(), 0.63599718086101786, epsilon = 0.0001); | |||
} | |||
#[test] | |||
fn checked_weighted_mean_returns_none_when_items_is_empty_and_unchecked_is_nan() { | |||
let w = WeightedMeanWindow::new(1_000_000_000); | |||
assert!(w.is_empty()); | |||
assert_relative_eq!(w.sum_w, 0.0f64); | |||
assert_relative_eq!(w.w_sum, 0.0f64); | |||
assert!(w.checked_weighted_mean().is_none()); | |||
assert!(w.weighted_mean().is_nan()); | |||
} | |||
#[test] | |||
fn purge_expires_items() { | |||
let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248, | |||
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ]; | |||
let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117, | |||
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ]; | |||
let xs_times_ws: Vec<f64> = xs.iter().zip(ws.iter()).map(|(&x,&w)| x * w).collect(); | |||
let mut w = WeightedMeanWindow::new(10); | |||
for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() { | |||
w.push(i as u64, val, weight); | |||
} | |||
w.purge(10); | |||
assert_eq!(w.items.len(), 10); | |||
w.purge(11); | |||
assert_eq!(w.items.len(), 9); | |||
assert_relative_eq!(w.sum_w, (&ws[1..]).iter().sum::<f64>(), epsilon = 1e-5); | |||
assert_relative_eq!(w.w_sum, (&xs_times_ws[1..]).iter().sum::<f64>(), epsilon = 1e-5); | |||
w.purge(11); | |||
assert_eq!(w.items.len(), 9); | |||
w.purge(12); | |||
assert_eq!(w.items.len(), 8); | |||
assert_relative_eq!(w.sum_w, (&ws[2..]).iter().sum::<f64>(), epsilon = 1e-5); | |||
assert_relative_eq!(w.w_sum, (&xs_times_ws[2..]).iter().sum::<f64>(), epsilon = 1e-5); | |||
} | |||
} | |||