@@ -36,6 +36,9 @@ chrono = { version = "0.4", features = ["serde"] } | |||||
clap = "2" | clap = "2" | ||||
itertools-num = "0.1" | itertools-num = "0.1" | ||||
[dev-dependencies] | |||||
approx = "0.3" | |||||
[profile.release] | [profile.release] | ||||
lto = "fat" | lto = "fat" | ||||
panic = "abort" | panic = "abort" | ||||
@@ -0,0 +1,514 @@ | |||||
{ | |||||
"cells": [ | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 1, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"CPU times: user 1min 8s, sys: 7.72 s, total: 1min 16s\n", | |||||
"Wall time: 1min 16s\n", | |||||
"<class 'pandas.core.frame.DataFrame'>\n", | |||||
"RangeIndex: 92331988 entries, 0 to 92331987\n", | |||||
"Data columns (total 7 columns):\n", | |||||
"time int64\n", | |||||
"amount float64\n", | |||||
"exch object\n", | |||||
"price float64\n", | |||||
"server_time int64\n", | |||||
"side object\n", | |||||
"ticker object\n", | |||||
"dtypes: float64(2), int64(2), object(3)\n", | |||||
"memory usage: 4.8+ GB\n" | |||||
] | |||||
} | |||||
], | |||||
"source": [ | |||||
"%time df = pd.read_csv('/xfs/sample.csv')\n", | |||||
"df.info()" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 2, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/html": [ | |||||
"<div>\n", | |||||
"<style scoped>\n", | |||||
" .dataframe tbody tr th:only-of-type {\n", | |||||
" vertical-align: middle;\n", | |||||
" }\n", | |||||
"\n", | |||||
" .dataframe tbody tr th {\n", | |||||
" vertical-align: top;\n", | |||||
" }\n", | |||||
"\n", | |||||
" .dataframe thead th {\n", | |||||
" text-align: right;\n", | |||||
" }\n", | |||||
"</style>\n", | |||||
"<table border=\"1\" class=\"dataframe\">\n", | |||||
" <thead>\n", | |||||
" <tr style=\"text-align: right;\">\n", | |||||
" <th></th>\n", | |||||
" <th>time</th>\n", | |||||
" <th>amount</th>\n", | |||||
" <th>exch</th>\n", | |||||
" <th>price</th>\n", | |||||
" <th>server_time</th>\n", | |||||
" <th>side</th>\n", | |||||
" <th>ticker</th>\n", | |||||
" </tr>\n", | |||||
" </thead>\n", | |||||
" <tbody>\n", | |||||
" <tr>\n", | |||||
" <td>0</td>\n", | |||||
" <td>1561939200002479372</td>\n", | |||||
" <td>1.4894</td>\n", | |||||
" <td>bnce</td>\n", | |||||
" <td>292.7000</td>\n", | |||||
" <td>1561939199919000064</td>\n", | |||||
" <td>NaN</td>\n", | |||||
" <td>eth_usd</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>1</td>\n", | |||||
" <td>1561939200011035644</td>\n", | |||||
" <td>0.0833</td>\n", | |||||
" <td>btfx</td>\n", | |||||
" <td>10809.0000</td>\n", | |||||
" <td>1561939199927000064</td>\n", | |||||
" <td>bid</td>\n", | |||||
" <td>btc_usd</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>2</td>\n", | |||||
" <td>1561939200011055712</td>\n", | |||||
" <td>0.8333</td>\n", | |||||
" <td>btfx</td>\n", | |||||
" <td>10809.0000</td>\n", | |||||
" <td>1561939199927000064</td>\n", | |||||
" <td>bid</td>\n", | |||||
" <td>btc_usd</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>3</td>\n", | |||||
" <td>1561939200019037617</td>\n", | |||||
" <td>0.0831</td>\n", | |||||
" <td>bnce</td>\n", | |||||
" <td>10854.1000</td>\n", | |||||
" <td>1561939199935000064</td>\n", | |||||
" <td>NaN</td>\n", | |||||
" <td>btc_usd</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>4</td>\n", | |||||
" <td>1561939200026450471</td>\n", | |||||
" <td>0.1250</td>\n", | |||||
" <td>okex</td>\n", | |||||
" <td>123.2100</td>\n", | |||||
" <td>1561939200026450432</td>\n", | |||||
" <td>ask</td>\n", | |||||
" <td>ltc_usd</td>\n", | |||||
" </tr>\n", | |||||
" </tbody>\n", | |||||
"</table>\n", | |||||
"</div>" | |||||
], | |||||
"text/plain": [ | |||||
" time amount exch price server_time side \\\n", | |||||
"0 1561939200002479372 1.4894 bnce 292.7000 1561939199919000064 NaN \n", | |||||
"1 1561939200011035644 0.0833 btfx 10809.0000 1561939199927000064 bid \n", | |||||
"2 1561939200011055712 0.8333 btfx 10809.0000 1561939199927000064 bid \n", | |||||
"3 1561939200019037617 0.0831 bnce 10854.1000 1561939199935000064 NaN \n", | |||||
"4 1561939200026450471 0.1250 okex 123.2100 1561939200026450432 ask \n", | |||||
"\n", | |||||
" ticker \n", | |||||
"0 eth_usd \n", | |||||
"1 btc_usd \n", | |||||
"2 btc_usd \n", | |||||
"3 btc_usd \n", | |||||
"4 ltc_usd " | |||||
] | |||||
}, | |||||
"execution_count": 2, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"df.head()" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 7, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"(True, True, True)" | |||||
] | |||||
}, | |||||
"execution_count": 7, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"SECOND = int(1e9)\n", | |||||
"\n", | |||||
"example_time = 1567295920000000000\n", | |||||
"\n", | |||||
"last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", | |||||
"last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", | |||||
"last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", | |||||
"of_btc_usd = df['ticker'] == 'btc_usd'\n", | |||||
"of_gdax = df['exch'] == 'gdax'\n", | |||||
"of_bmex = df['exch'] == 'bmex'\n", | |||||
"\n", | |||||
"g5 = last_5min & of_btc_usd & of_gdax\n", | |||||
"b5 = last_5min & of_btc_usd & of_bmex\n", | |||||
"g15 = last_15min & of_btc_usd & of_gdax\n", | |||||
"b15 = last_15min & of_btc_usd & of_bmex\n", | |||||
"g60 = last_60min & of_btc_usd & of_gdax\n", | |||||
"b60 = last_60min & of_btc_usd & of_bmex\n", | |||||
"\n", | |||||
"ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", | |||||
"ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", | |||||
"ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", | |||||
"\n", | |||||
"abs(ratio_5min - 1.000474060563638) < 1e-6, abs(ratio_15min - 1.0005019306061411) < 1e-6, abs(ratio_60min - 1.0002338013889658) < 1e-6" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 8, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/html": [ | |||||
"<div>\n", | |||||
"<style scoped>\n", | |||||
" .dataframe tbody tr th:only-of-type {\n", | |||||
" vertical-align: middle;\n", | |||||
" }\n", | |||||
"\n", | |||||
" .dataframe tbody tr th {\n", | |||||
" vertical-align: top;\n", | |||||
" }\n", | |||||
"\n", | |||||
" .dataframe thead th {\n", | |||||
" text-align: right;\n", | |||||
" }\n", | |||||
"</style>\n", | |||||
"<table border=\"1\" class=\"dataframe\">\n", | |||||
" <thead>\n", | |||||
" <tr style=\"text-align: right;\">\n", | |||||
" <th></th>\n", | |||||
" <th>time</th>\n", | |||||
" <th>last</th>\n", | |||||
" <th>bmex_5min</th>\n", | |||||
" <th>gdax_5min</th>\n", | |||||
" <th>n_bmex_p5</th>\n", | |||||
" <th>n_gdax_p5</th>\n", | |||||
" <th>r5</th>\n", | |||||
" <th>r15</th>\n", | |||||
" <th>r60</th>\n", | |||||
" </tr>\n", | |||||
" </thead>\n", | |||||
" <tbody>\n", | |||||
" <tr>\n", | |||||
" <td>0</td>\n", | |||||
" <td>1561939210000000000</td>\n", | |||||
" <td>10758.5800</td>\n", | |||||
" <td>10760.7205</td>\n", | |||||
" <td>10760.4593</td>\n", | |||||
" <td>22</td>\n", | |||||
" <td>28</td>\n", | |||||
" <td>1.0000</td>\n", | |||||
" <td>1.0000</td>\n", | |||||
" <td>1.0000</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>1</td>\n", | |||||
" <td>1561939220000000000</td>\n", | |||||
" <td>10770.0000</td>\n", | |||||
" <td>10763.6811</td>\n", | |||||
" <td>10761.2528</td>\n", | |||||
" <td>230</td>\n", | |||||
" <td>75</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>2</td>\n", | |||||
" <td>1561939230000000000</td>\n", | |||||
" <td>10758.0100</td>\n", | |||||
" <td>10761.8843</td>\n", | |||||
" <td>10760.1596</td>\n", | |||||
" <td>418</td>\n", | |||||
" <td>120</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" <td>1.0002</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>3</td>\n", | |||||
" <td>1561939240000000000</td>\n", | |||||
" <td>10752.5000</td>\n", | |||||
" <td>10757.6829</td>\n", | |||||
" <td>10760.3630</td>\n", | |||||
" <td>507</td>\n", | |||||
" <td>147</td>\n", | |||||
" <td>0.9998</td>\n", | |||||
" <td>0.9998</td>\n", | |||||
" <td>0.9998</td>\n", | |||||
" </tr>\n", | |||||
" <tr>\n", | |||||
" <td>4</td>\n", | |||||
" <td>1561939250000000000</td>\n", | |||||
" <td>10772.6900</td>\n", | |||||
" <td>10757.5702</td>\n", | |||||
" <td>10763.0840</td>\n", | |||||
" <td>537</td>\n", | |||||
" <td>191</td>\n", | |||||
" <td>0.9995</td>\n", | |||||
" <td>0.9995</td>\n", | |||||
" <td>0.9995</td>\n", | |||||
" </tr>\n", | |||||
" </tbody>\n", | |||||
"</table>\n", | |||||
"</div>" | |||||
], | |||||
"text/plain": [ | |||||
" time last bmex_5min gdax_5min n_bmex_p5 n_gdax_p5 \\\n", | |||||
"0 1561939210000000000 10758.5800 10760.7205 10760.4593 22 28 \n", | |||||
"1 1561939220000000000 10770.0000 10763.6811 10761.2528 230 75 \n", | |||||
"2 1561939230000000000 10758.0100 10761.8843 10760.1596 418 120 \n", | |||||
"3 1561939240000000000 10752.5000 10757.6829 10760.3630 507 147 \n", | |||||
"4 1561939250000000000 10772.6900 10757.5702 10763.0840 537 191 \n", | |||||
"\n", | |||||
" r5 r15 r60 \n", | |||||
"0 1.0000 1.0000 1.0000 \n", | |||||
"1 1.0002 1.0002 1.0002 \n", | |||||
"2 1.0002 1.0002 1.0002 \n", | |||||
"3 0.9998 0.9998 0.9998 \n", | |||||
"4 0.9995 0.9995 0.9995 " | |||||
] | |||||
}, | |||||
"execution_count": 8, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"ref = pd.read_csv('../var/hard.csv')\n", | |||||
"ref.head()" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 12, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"array([1567295750000000000, 1567295760000000000, 1567295770000000000, 1567295780000000000, 1567295790000000000, 1567295800000000000, 1567295810000000000, 1567295820000000000, 1567295830000000000,\n", | |||||
" 1567295840000000000, 1567295850000000000, 1567295860000000000, 1567295870000000000, 1567295880000000000, 1567295890000000000, 1567295900000000000, 1567295910000000000, 1567295920000000000,\n", | |||||
" 1567295930000000000, 1567295940000000000, 1567295950000000000, 1567295960000000000, 1567295970000000000, 1567295980000000000, 1567295990000000000])" | |||||
] | |||||
}, | |||||
"execution_count": 12, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"ref['time'].tail(25).values" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 13, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"name": "stdout", | |||||
"output_type": "stream", | |||||
"text": [ | |||||
"finished in 487.8sec\n" | |||||
] | |||||
} | |||||
], | |||||
"source": [ | |||||
"import time\n", | |||||
"\n", | |||||
"start = time.time()\n", | |||||
"rows = []\n", | |||||
"\n", | |||||
"for example_time in ref['time'].tail(25).values:\n", | |||||
" last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", | |||||
" last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", | |||||
" last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", | |||||
" of_btc_usd = df['ticker'] == 'btc_usd'\n", | |||||
" of_gdax = df['exch'] == 'gdax'\n", | |||||
" of_bmex = df['exch'] == 'bmex'\n", | |||||
"\n", | |||||
" g5 = last_5min & of_btc_usd & of_gdax\n", | |||||
" b5 = last_5min & of_btc_usd & of_bmex\n", | |||||
" g15 = last_15min & of_btc_usd & of_gdax\n", | |||||
" b15 = last_15min & of_btc_usd & of_bmex\n", | |||||
" g60 = last_60min & of_btc_usd & of_gdax\n", | |||||
" b60 = last_60min & of_btc_usd & of_bmex\n", | |||||
"\n", | |||||
" ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", | |||||
" ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", | |||||
" ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", | |||||
" rows.append(dict(example_time=example_time, r5=ratio_5min, r15=ratio_15min, r60=ratio_60min))\n", | |||||
" \n", | |||||
"took = time.time() - start\n", | |||||
"print('finished in {:.1f}sec'.format(took))" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 19, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"3.342554545733013" | |||||
] | |||||
}, | |||||
"execution_count": 19, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"hypothetical_full_took = (took / 25) * 5401808\n", | |||||
"hypothetical_full_took / 60 / 60 / 24 / 365" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 20, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"0.1016643329480867" | |||||
] | |||||
}, | |||||
"execution_count": 20, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"92331988 / 908204336" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 21, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"19.513984975814818" | |||||
] | |||||
}, | |||||
"execution_count": 21, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"took / 25" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 24, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"('105,410,800.2', 105410800.15423629)" | |||||
] | |||||
}, | |||||
"execution_count": 24, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"'{:,.1f}'.format(hypothetical_full_took), hypothetical_full_took" | |||||
] | |||||
}, | |||||
{ | |||||
"cell_type": "code", | |||||
"execution_count": 32, | |||||
"metadata": {}, | |||||
"outputs": [ | |||||
{ | |||||
"data": { | |||||
"text/plain": [ | |||||
"r5_delta 0.000000000002368\n", | |||||
"r15_delta 0.000000000010704\n", | |||||
"r60_delta 0.000000000005513\n", | |||||
"dtype: object" | |||||
] | |||||
}, | |||||
"execution_count": 32, | |||||
"metadata": {}, | |||||
"output_type": "execute_result" | |||||
} | |||||
], | |||||
"source": [ | |||||
"(pd.DataFrame(rows).join(ref.set_index('time'), on='example_time', rsuffix='_rust')\n", | |||||
" .assign(r5_delta=lambda df: abs(df['r5'] - df['r5_rust']))\n", | |||||
" .assign(r15_delta=lambda df: abs(df['r15'] - df['r15_rust']))\n", | |||||
" .assign(r60_delta=lambda df: abs(df['r60'] - df['r60_rust']))\n", | |||||
")[['r5_delta','r15_delta','r60_delta']].max(axis=0).map(lambda x: '{:.15f}'.format(x))" | |||||
] | |||||
} | |||||
], | |||||
"metadata": { | |||||
"kernelspec": { | |||||
"display_name": "Python 3", | |||||
"language": "python", | |||||
"name": "python3" | |||||
}, | |||||
"language_info": { | |||||
"codemirror_mode": { | |||||
"name": "ipython", | |||||
"version": 3 | |||||
}, | |||||
"file_extension": ".py", | |||||
"mimetype": "text/x-python", | |||||
"name": "python", | |||||
"nbconvert_exporter": "python", | |||||
"pygments_lexer": "ipython3", | |||||
"version": "3.7.5" | |||||
} | |||||
}, | |||||
"nbformat": 4, | |||||
"nbformat_minor": 2 | |||||
} |
@@ -15,7 +15,7 @@ use serde::{Serialize, Deserialize}; | |||||
use slog::Drain; | use slog::Drain; | ||||
use pretty_toa::ThousandsSep; | use pretty_toa::ThousandsSep; | ||||
use markets::crypto::{Exchange, Ticker, Side}; | use markets::crypto::{Exchange, Ticker, Side}; | ||||
use pipelines::windows::WeightedAvgWindow; | |||||
use pipelines::windows::WeightedMeanWindow; | |||||
// equivalent to panic! but without the ugly 'thread main panicked' yada yada | // equivalent to panic! but without the ugly 'thread main panicked' yada yada | ||||
@@ -214,24 +214,28 @@ fn hard_mode<R, W>( | |||||
let mut ratios: Lookbacks<f64> = Default::default(); | let mut ratios: Lookbacks<f64> = Default::default(); | ||||
let mut bwindows: Lookbacks<WeightedAvgWindow> = | |||||
let mut bmex_windows: Lookbacks<WeightedMeanWindow> = | |||||
Lookbacks { | Lookbacks { | ||||
p5: WeightedAvgWindow::new(ONE_SECOND * 60 * 5 ), | |||||
p15: WeightedAvgWindow::new(ONE_SECOND * 60 * 15), | |||||
p60: WeightedAvgWindow::new(ONE_SECOND * 60 * 60), | |||||
p5: WeightedMeanWindow::new(ONE_SECOND * 60 * 5 ), | |||||
p15: WeightedMeanWindow::new(ONE_SECOND * 60 * 15), | |||||
p60: WeightedMeanWindow::new(ONE_SECOND * 60 * 60), | |||||
}; | }; | ||||
let mut gwindows = bwindows.clone(); | |||||
let mut gdax_windows = bmex_windows.clone(); | |||||
#[inline(always)] | #[inline(always)] | ||||
fn do_purge(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64) { | |||||
if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_wt_mean().unwrap_or(NAN); } | |||||
if windows.p15.purge(time) { prices.p15 = windows.p15.checked_wt_mean().unwrap_or(NAN); } | |||||
if windows.p60.purge(time) { prices.p60 = windows.p60.checked_wt_mean().unwrap_or(NAN); } | |||||
fn do_purge(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64) { | |||||
//if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_weighted_mean().unwrap_or(NAN); } | |||||
//if windows.p15.purge(time) { prices.p15 = windows.p15.checked_weighted_mean().unwrap_or(NAN); } | |||||
//if windows.p60.purge(time) { prices.p60 = windows.p60.checked_weighted_mean().unwrap_or(NAN); } | |||||
windows.p5 .purge(time); | |||||
windows.p15.purge(time); | |||||
windows.p60.purge(time); | |||||
} | } | ||||
#[allow(unused)] | |||||
#[inline(always)] | #[inline(always)] | ||||
fn do_update(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) { | |||||
fn do_update(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) { | |||||
//prices.p5 = windows.p5 .update(time, price, amount).unwrap_or(NAN); | //prices.p5 = windows.p5 .update(time, price, amount).unwrap_or(NAN); | ||||
//prices.p15 = windows.p15.update(time, price, amount).unwrap_or(NAN); | //prices.p15 = windows.p15.update(time, price, amount).unwrap_or(NAN); | ||||
//prices.p60 = windows.p60.update(time, price, amount).unwrap_or(NAN); | //prices.p60 = windows.p60.update(time, price, amount).unwrap_or(NAN); | ||||
@@ -239,21 +243,20 @@ fn hard_mode<R, W>( | |||||
windows.p5 .push(time, price, amount); | windows.p5 .push(time, price, amount); | ||||
windows.p15.push(time, price, amount); | windows.p15.push(time, price, amount); | ||||
windows.p60.push(time, price, amount); | windows.p60.push(time, price, amount); | ||||
} | } | ||||
macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body | macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body | ||||
($trade:ident) => {{ | ($trade:ident) => {{ | ||||
match $trade.exch { | match $trade.exch { | ||||
e!(bmex) => { | e!(bmex) => { | ||||
do_update(&mut bwindows, &mut bprices, $trade.time, $trade.price, $trade.amount); | |||||
//do_purge(&mut gwindows, &mut gprices, $trade.time); | |||||
do_update(&mut bmex_windows, &mut bprices, $trade.time, $trade.price, $trade.amount); | |||||
//do_purge(&mut gdax_windows, &mut gprices, $trade.time); | |||||
last_price = $trade.price; | last_price = $trade.price; | ||||
} | } | ||||
e!(gdax) => { | e!(gdax) => { | ||||
do_update(&mut gwindows, &mut gprices, $trade.time, $trade.price, $trade.amount); | |||||
//do_purge(&mut bwindows, &mut bprices, $trade.time); | |||||
do_update(&mut gdax_windows, &mut gprices, $trade.time, $trade.price, $trade.amount); | |||||
//do_purge(&mut bmex_windows, &mut bprices, $trade.time); | |||||
last_price = $trade.price; | last_price = $trade.price; | ||||
} | } | ||||
@@ -264,11 +267,11 @@ fn hard_mode<R, W>( | |||||
wtr.write_record(&[ | wtr.write_record(&[ | ||||
"time", | "time", | ||||
"last", | |||||
"bmex_5min", | |||||
"gdax_5min", | |||||
"n_bmex_p5", | |||||
"n_gdax_p5", | |||||
//"last", | |||||
//"bmex_5min", | |||||
//"gdax_5min", | |||||
//"n_bmex_p5", | |||||
//"n_gdax_p5", | |||||
"r5", | "r5", | ||||
"r15", | "r15", | ||||
"r60", | "r60", | ||||
@@ -279,7 +282,7 @@ fn hard_mode<R, W>( | |||||
//"n_gdax_p15", | //"n_gdax_p15", | ||||
//"n_gdax_p60", | //"n_gdax_p60", | ||||
//"gdax_p5_is_empty", | //"gdax_p5_is_empty", | ||||
//"gdax_p5_checked_wt_mean", | |||||
//"gdax_p5_checked_weighted_mean", | |||||
//"tradetime_minus_cur_bucket", | //"tradetime_minus_cur_bucket", | ||||
]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?; | ]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?; | ||||
@@ -306,48 +309,48 @@ fn hard_mode<R, W>( | |||||
"n written" => n_written, | "n written" => n_written, | ||||
"trade.time" => trade.time, | "trade.time" => trade.time, | ||||
"cur_bucket" => cur_bucket, | "cur_bucket" => cur_bucket, | ||||
"gdax p5 len" => gwindows.p5.len(), | |||||
"gdax p5 wt avg" => gwindows.p5.wt_mean(), | |||||
"gdax p5 len" => gdax_windows.p5.len(), | |||||
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(), | |||||
); | ); | ||||
do_purge(&mut gwindows, &mut gprices, cur_bucket); | |||||
do_purge(&mut bwindows, &mut bprices, cur_bucket); | |||||
do_purge(&mut gdax_windows, &mut gprices, cur_bucket); | |||||
do_purge(&mut bmex_windows, &mut bprices, cur_bucket); | |||||
debug!(logger, "finished purge"; | debug!(logger, "finished purge"; | ||||
"n" => n, | "n" => n, | ||||
"n written" => n_written, | "n written" => n_written, | ||||
"trade.time" => trade.time, | "trade.time" => trade.time, | ||||
"cur_bucket" => cur_bucket, | "cur_bucket" => cur_bucket, | ||||
"gdax p5 len" => gwindows.p5.len(), | |||||
"gdax p5 wt avg" => gwindows.p5.wt_mean(), | |||||
"gdax p5 len" => gdax_windows.p5.len(), | |||||
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(), | |||||
); | ); | ||||
ratios.p5 = bwindows.p5 .checked_wt_mean().unwrap_or(NAN) / gwindows.p5 .checked_wt_mean().unwrap_or(NAN); | |||||
ratios.p15 = bwindows.p15.checked_wt_mean().unwrap_or(NAN) / gwindows.p15.checked_wt_mean().unwrap_or(NAN); | |||||
ratios.p60 = bwindows.p60.checked_wt_mean().unwrap_or(NAN) / gwindows.p60.checked_wt_mean().unwrap_or(NAN); | |||||
ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean(); | |||||
ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean(); | |||||
ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean(); | |||||
//ratios.p5 = bwindows.p5 .wt_mean() / gwindows.p5 .wt_mean(); | |||||
//ratios.p15 = bwindows.p15.wt_mean() / gwindows.p15.wt_mean(); | |||||
//ratios.p60 = bwindows.p60.wt_mean() / gwindows.p60.wt_mean(); | |||||
//ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean(); | |||||
//ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean(); | |||||
//ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean(); | |||||
wtr.write_record(&[ | wtr.write_record(&[ | ||||
&format!("{}", cur_bucket), | &format!("{}", cur_bucket), | ||||
&format!("{}", last_price), | |||||
&format!("{}", bwindows.p5.checked_wt_mean().unwrap_or(NAN)), | |||||
&format!("{}", gwindows.p5.checked_wt_mean().unwrap_or(NAN)), | |||||
&format!("{}", bwindows.p5.len()), | |||||
&format!("{}", gwindows.p5.len()), | |||||
//&format!("{}", last_price), | |||||
//&format!("{}", bmex_windows.p5.checked_weighted_mean().unwrap_or(NAN)), | |||||
//&format!("{}", gdax_windows.p5.checked_weighted_mean().unwrap_or(NAN)), | |||||
//&format!("{}", bmex_windows.p5.len()), | |||||
//&format!("{}", gdax_windows.p5.len()), | |||||
&format!("{}", ratios.p5), | &format!("{}", ratios.p5), | ||||
&format!("{}", ratios.p15), | &format!("{}", ratios.p15), | ||||
&format!("{}", ratios.p60), | &format!("{}", ratios.p60), | ||||
//&format!("{}", bwindows.p15.len()), | |||||
//&format!("{}", gwindows.p60.len()), | |||||
//&format!("{}", gwindows.p15.len()), | |||||
//&format!("{}", gwindows.p15.len()), | |||||
//&format!("{}", bwindows.p60.len()), | |||||
//&format!("{}", bwindows.p5.is_empty()), | |||||
//&format!("{:?}", bwindows.p5.checked_wt_mean()), | |||||
//&format!("{}", bmex_windows.p15.len()), | |||||
//&format!("{}", gdax_windows.p60.len()), | |||||
//&format!("{}", gdax_windows.p15.len()), | |||||
//&format!("{}", gdax_windows.p15.len()), | |||||
//&format!("{}", bmex_windows.p60.len()), | |||||
//&format!("{}", bmex_windows.p5.is_empty()), | |||||
//&format!("{:?}", bmex_windows.p5.checked_weighted_mean()), | |||||
//&format!("{}", trade.time - cur_bucket), | //&format!("{}", trade.time - cur_bucket), | ||||
]).map_err(|e| { | ]).map_err(|e| { | ||||
@@ -1,5 +1,28 @@ | |||||
use std::collections::VecDeque; | use std::collections::VecDeque; | ||||
/// Calculates online weighted average for a rolling, time-based window | |||||
#[derive(Clone)] | |||||
pub struct WeightedMeanWindow { | |||||
/// The size of the window. On `purge`, any `WeightedPoint` items are considered | |||||
/// expired if the supplied `time` parameter is greater than `size` from the | |||||
/// `time` attribute of that `WeightedPoint` item. | |||||
size: u64, | |||||
/// The weights and values with times that are "currently" in the aggregation | |||||
/// window. On `push`, items are added to the "back" of the vedeque. On `purge`, | |||||
/// items with a `time` that is > `size` difference relative to the `time` passed | |||||
/// to `purge` are considered expired and removed. In both cases, adding and removing, | |||||
/// the incremental accumulated sums in `w_sum` and `sum_w` are updated. | |||||
items: VecDeque<WeightedPoint>, | |||||
/// The sum of the value * weight for each of the `WeightedPoint`s in `items`. | |||||
w_sum: f64, | |||||
/// The sum of the weights of each of the `WeightedPoint`s in `items`. | |||||
sum_w: f64, | |||||
} | |||||
/// Stores the time, value and weight for an item "currently" inside the | |||||
/// aggregation window of a `WeightedMeanWindow`, allowing its value and | |||||
/// weight to be subtracted from the accumulated sums of the window when | |||||
/// the item becomes expired. | |||||
#[derive(Debug, Clone)] | #[derive(Debug, Clone)] | ||||
pub struct WeightedPoint { | pub struct WeightedPoint { | ||||
pub time: u64, | pub time: u64, | ||||
@@ -12,16 +35,7 @@ pub struct WeightedPoint { | |||||
pub wt: f64, | pub wt: f64, | ||||
} | } | ||||
#[derive(Clone)] | |||||
pub struct WeightedAvgWindow { | |||||
size: u64, | |||||
items: VecDeque<WeightedPoint>, | |||||
w_sum: f64, | |||||
sum_w: f64, | |||||
//w_mean: f64, | |||||
} | |||||
impl WeightedAvgWindow { | |||||
impl WeightedMeanWindow { | |||||
pub fn new(size: u64) -> Self { | pub fn new(size: u64) -> Self { | ||||
Self { | Self { | ||||
size, | size, | ||||
@@ -35,9 +49,19 @@ impl WeightedAvgWindow { | |||||
/// | /// | ||||
/// Returns `true` if any items were removed. | /// Returns `true` if any items were removed. | ||||
pub fn purge(&mut self, time: u64) -> bool { | pub fn purge(&mut self, time: u64) -> bool { | ||||
// this is somewhat awkwardly implemented, but there is not anything like | |||||
// `drain_while` on `VecDeque` (or `Vec`) that would work like `take_while`, | |||||
// except also removing the items. Since we need the data in the items we | |||||
// are removing to update `sum_w` and `w_sum`, we loop over the expired | |||||
// items first, counting them in `n_remove`, then actually remove them | |||||
// in a second pass. | |||||
let mut n_remove = 0; | let mut n_remove = 0; | ||||
{ | { | ||||
// extra scope needed to shush the borrow checker | |||||
let items = &self.items; | let items = &self.items; | ||||
let w_sum = &mut self.w_sum; | let w_sum = &mut self.w_sum; | ||||
let sum_w = &mut self.sum_w; | let sum_w = &mut self.sum_w; | ||||
@@ -52,7 +76,10 @@ impl WeightedAvgWindow { | |||||
for _ in 0..n_remove { self.items.pop_front(); } | for _ in 0..n_remove { self.items.pop_front(); } | ||||
// when items is empty, set w_sum, sum_w to 0.0 | |||||
// when items is empty, set w_sum, sum_w to 0.0. the motive | |||||
// of this approach, versus an if block with assignment, is | |||||
// for the code to be "branchless" and do the same work each | |||||
// time, in a cache- and branch predictor-friendly manner. | |||||
let zeroer: f64 = ( ! self.items.is_empty()) as u8 as f64; | let zeroer: f64 = ( ! self.items.is_empty()) as u8 as f64; | ||||
self.w_sum *= zeroer; | self.w_sum *= zeroer; | ||||
self.sum_w *= zeroer; | self.sum_w *= zeroer; | ||||
@@ -61,6 +88,10 @@ impl WeightedAvgWindow { | |||||
} | } | ||||
/// Add a new item, updating incremental calculations in the process. | /// Add a new item, updating incremental calculations in the process. | ||||
/// | |||||
/// Note: it is assumed that `time` is >= the highest `time` value for any previous | |||||
/// item. The expiration logic `purge` relies on the items being added to a | |||||
/// `WeightedMeanWindow` in chronological order. | |||||
pub fn push(&mut self, time: u64, val: f64, wt: f64) { | pub fn push(&mut self, time: u64, val: f64, wt: f64) { | ||||
let wt_val: f64 = val * wt; | let wt_val: f64 = val * wt; | ||||
self.w_sum += wt_val; | self.w_sum += wt_val; | ||||
@@ -72,24 +103,26 @@ impl WeightedAvgWindow { | |||||
/// accumulators. | /// accumulators. | ||||
/// | /// | ||||
/// Note; this value is not cached. | /// Note; this value is not cached. | ||||
pub fn wt_mean(&self) -> f64 { | |||||
pub fn weighted_mean(&self) -> f64 { | |||||
self.w_sum / self.sum_w | self.w_sum / self.sum_w | ||||
} | } | ||||
/// Checks whether items `is_empty` before trying to calculate. | /// Checks whether items `is_empty` before trying to calculate. | ||||
/// Returns None if items is empty. | /// Returns None if items is empty. | ||||
pub fn checked_wt_mean(&self) -> Option<f64> { | |||||
/// | |||||
/// Note: this value is not cached. | |||||
pub fn checked_weighted_mean(&self) -> Option<f64> { | |||||
match self.is_empty() { | match self.is_empty() { | ||||
true => None, | true => None, | ||||
false => Some(self.w_sum / self.sum_w), | false => Some(self.w_sum / self.sum_w), | ||||
} | } | ||||
} | } | ||||
/// Purge, push and get `checked_wt_mean`, all in one convenient step. | |||||
/// Purge, push and get `checked_weighted_mean`, all in one convenient step. | |||||
pub fn update(&mut self, time: u64, val: f64, wt: f64) -> Option<f64> { | pub fn update(&mut self, time: u64, val: f64, wt: f64) -> Option<f64> { | ||||
self.purge(time); | self.purge(time); | ||||
self.push(time, val, wt); | self.push(time, val, wt); | ||||
self.checked_wt_mean() | |||||
self.checked_weighted_mean() | |||||
} | } | ||||
pub fn len(&self) -> usize { self.items.len() } | pub fn len(&self) -> usize { self.items.len() } | ||||
@@ -97,3 +130,80 @@ impl WeightedAvgWindow { | |||||
pub fn is_empty(&self) -> bool { self.items.is_empty() } | pub fn is_empty(&self) -> bool { self.items.is_empty() } | ||||
} | } | ||||
#[allow(unused)] | |||||
#[cfg(test)] | |||||
mod tests { | |||||
use super::*; | |||||
use approx::assert_relative_eq; | |||||
#[test] | |||||
fn weighted_mean_output_matches_numpy_average() { | |||||
let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248, | |||||
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ]; | |||||
let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117, | |||||
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ]; | |||||
let mut w = WeightedMeanWindow::new(1_000_000_000); | |||||
for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() { | |||||
w.push(i as u64, val, weight); | |||||
} | |||||
w.purge(11); | |||||
assert_eq!(w.items.len(), 10); | |||||
assert_relative_eq!(w.weighted_mean(), 0.63599718086101786, epsilon = 0.0001); | |||||
} | |||||
#[test] | |||||
fn checked_weighted_mean_returns_none_when_items_is_empty_and_unchecked_is_nan() { | |||||
let w = WeightedMeanWindow::new(1_000_000_000); | |||||
assert!(w.is_empty()); | |||||
assert_relative_eq!(w.sum_w, 0.0f64); | |||||
assert_relative_eq!(w.w_sum, 0.0f64); | |||||
assert!(w.checked_weighted_mean().is_none()); | |||||
assert!(w.weighted_mean().is_nan()); | |||||
} | |||||
#[test] | |||||
fn purge_expires_items() { | |||||
let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248, | |||||
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ]; | |||||
let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117, | |||||
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ]; | |||||
let xs_times_ws: Vec<f64> = xs.iter().zip(ws.iter()).map(|(&x,&w)| x * w).collect(); | |||||
let mut w = WeightedMeanWindow::new(10); | |||||
for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() { | |||||
w.push(i as u64, val, weight); | |||||
} | |||||
w.purge(10); | |||||
assert_eq!(w.items.len(), 10); | |||||
w.purge(11); | |||||
assert_eq!(w.items.len(), 9); | |||||
assert_relative_eq!(w.sum_w, (&ws[1..]).iter().sum::<f64>(), epsilon = 1e-5); | |||||
assert_relative_eq!(w.w_sum, (&xs_times_ws[1..]).iter().sum::<f64>(), epsilon = 1e-5); | |||||
w.purge(11); | |||||
assert_eq!(w.items.len(), 9); | |||||
w.purge(12); | |||||
assert_eq!(w.items.len(), 8); | |||||
assert_relative_eq!(w.sum_w, (&ws[2..]).iter().sum::<f64>(), epsilon = 1e-5); | |||||
assert_relative_eq!(w.w_sum, (&xs_times_ws[2..]).iter().sum::<f64>(), epsilon = 1e-5); | |||||
} | |||||
} | |||||