Browse Source

streamlined --hard-mode for csv, tests for WeightedMeanWindow

tags/v0.2.0
Jonathan Strong 4 years ago
parent
commit
e0faeef729
4 changed files with 691 additions and 61 deletions
  1. +3
    -0
      Cargo.toml
  2. +514
    -0
      notebooks/verifying-hard-query-output-of-reference-impl.ipynb
  3. +49
    -46
      src/csv.rs
  4. +125
    -15
      src/windows.rs

+ 3
- 0
Cargo.toml View File

@@ -36,6 +36,9 @@ chrono = { version = "0.4", features = ["serde"] }
clap = "2"
itertools-num = "0.1"

[dev-dependencies]
approx = "0.3"

[profile.release]
lto = "fat"
panic = "abort"


+ 514
- 0
notebooks/verifying-hard-query-output-of-reference-impl.ipynb View File

@@ -0,0 +1,514 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1min 8s, sys: 7.72 s, total: 1min 16s\n",
"Wall time: 1min 16s\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 92331988 entries, 0 to 92331987\n",
"Data columns (total 7 columns):\n",
"time int64\n",
"amount float64\n",
"exch object\n",
"price float64\n",
"server_time int64\n",
"side object\n",
"ticker object\n",
"dtypes: float64(2), int64(2), object(3)\n",
"memory usage: 4.8+ GB\n"
]
}
],
"source": [
"%time df = pd.read_csv('/xfs/sample.csv')\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>time</th>\n",
" <th>amount</th>\n",
" <th>exch</th>\n",
" <th>price</th>\n",
" <th>server_time</th>\n",
" <th>side</th>\n",
" <th>ticker</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>1561939200002479372</td>\n",
" <td>1.4894</td>\n",
" <td>bnce</td>\n",
" <td>292.7000</td>\n",
" <td>1561939199919000064</td>\n",
" <td>NaN</td>\n",
" <td>eth_usd</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>1561939200011035644</td>\n",
" <td>0.0833</td>\n",
" <td>btfx</td>\n",
" <td>10809.0000</td>\n",
" <td>1561939199927000064</td>\n",
" <td>bid</td>\n",
" <td>btc_usd</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1561939200011055712</td>\n",
" <td>0.8333</td>\n",
" <td>btfx</td>\n",
" <td>10809.0000</td>\n",
" <td>1561939199927000064</td>\n",
" <td>bid</td>\n",
" <td>btc_usd</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1561939200019037617</td>\n",
" <td>0.0831</td>\n",
" <td>bnce</td>\n",
" <td>10854.1000</td>\n",
" <td>1561939199935000064</td>\n",
" <td>NaN</td>\n",
" <td>btc_usd</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1561939200026450471</td>\n",
" <td>0.1250</td>\n",
" <td>okex</td>\n",
" <td>123.2100</td>\n",
" <td>1561939200026450432</td>\n",
" <td>ask</td>\n",
" <td>ltc_usd</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" time amount exch price server_time side \\\n",
"0 1561939200002479372 1.4894 bnce 292.7000 1561939199919000064 NaN \n",
"1 1561939200011035644 0.0833 btfx 10809.0000 1561939199927000064 bid \n",
"2 1561939200011055712 0.8333 btfx 10809.0000 1561939199927000064 bid \n",
"3 1561939200019037617 0.0831 bnce 10854.1000 1561939199935000064 NaN \n",
"4 1561939200026450471 0.1250 okex 123.2100 1561939200026450432 ask \n",
"\n",
" ticker \n",
"0 eth_usd \n",
"1 btc_usd \n",
"2 btc_usd \n",
"3 btc_usd \n",
"4 ltc_usd "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(True, True, True)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"SECOND = int(1e9)\n",
"\n",
"example_time = 1567295920000000000\n",
"\n",
"last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n",
"last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n",
"last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n",
"of_btc_usd = df['ticker'] == 'btc_usd'\n",
"of_gdax = df['exch'] == 'gdax'\n",
"of_bmex = df['exch'] == 'bmex'\n",
"\n",
"g5 = last_5min & of_btc_usd & of_gdax\n",
"b5 = last_5min & of_btc_usd & of_bmex\n",
"g15 = last_15min & of_btc_usd & of_gdax\n",
"b15 = last_15min & of_btc_usd & of_bmex\n",
"g60 = last_60min & of_btc_usd & of_gdax\n",
"b60 = last_60min & of_btc_usd & of_bmex\n",
"\n",
"ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n",
"ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n",
"ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n",
"\n",
"abs(ratio_5min - 1.000474060563638) < 1e-6, abs(ratio_15min - 1.0005019306061411) < 1e-6, abs(ratio_60min - 1.0002338013889658) < 1e-6"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>time</th>\n",
" <th>last</th>\n",
" <th>bmex_5min</th>\n",
" <th>gdax_5min</th>\n",
" <th>n_bmex_p5</th>\n",
" <th>n_gdax_p5</th>\n",
" <th>r5</th>\n",
" <th>r15</th>\n",
" <th>r60</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>1561939210000000000</td>\n",
" <td>10758.5800</td>\n",
" <td>10760.7205</td>\n",
" <td>10760.4593</td>\n",
" <td>22</td>\n",
" <td>28</td>\n",
" <td>1.0000</td>\n",
" <td>1.0000</td>\n",
" <td>1.0000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>1561939220000000000</td>\n",
" <td>10770.0000</td>\n",
" <td>10763.6811</td>\n",
" <td>10761.2528</td>\n",
" <td>230</td>\n",
" <td>75</td>\n",
" <td>1.0002</td>\n",
" <td>1.0002</td>\n",
" <td>1.0002</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1561939230000000000</td>\n",
" <td>10758.0100</td>\n",
" <td>10761.8843</td>\n",
" <td>10760.1596</td>\n",
" <td>418</td>\n",
" <td>120</td>\n",
" <td>1.0002</td>\n",
" <td>1.0002</td>\n",
" <td>1.0002</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1561939240000000000</td>\n",
" <td>10752.5000</td>\n",
" <td>10757.6829</td>\n",
" <td>10760.3630</td>\n",
" <td>507</td>\n",
" <td>147</td>\n",
" <td>0.9998</td>\n",
" <td>0.9998</td>\n",
" <td>0.9998</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1561939250000000000</td>\n",
" <td>10772.6900</td>\n",
" <td>10757.5702</td>\n",
" <td>10763.0840</td>\n",
" <td>537</td>\n",
" <td>191</td>\n",
" <td>0.9995</td>\n",
" <td>0.9995</td>\n",
" <td>0.9995</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" time last bmex_5min gdax_5min n_bmex_p5 n_gdax_p5 \\\n",
"0 1561939210000000000 10758.5800 10760.7205 10760.4593 22 28 \n",
"1 1561939220000000000 10770.0000 10763.6811 10761.2528 230 75 \n",
"2 1561939230000000000 10758.0100 10761.8843 10760.1596 418 120 \n",
"3 1561939240000000000 10752.5000 10757.6829 10760.3630 507 147 \n",
"4 1561939250000000000 10772.6900 10757.5702 10763.0840 537 191 \n",
"\n",
" r5 r15 r60 \n",
"0 1.0000 1.0000 1.0000 \n",
"1 1.0002 1.0002 1.0002 \n",
"2 1.0002 1.0002 1.0002 \n",
"3 0.9998 0.9998 0.9998 \n",
"4 0.9995 0.9995 0.9995 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ref = pd.read_csv('../var/hard.csv')\n",
"ref.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1567295750000000000, 1567295760000000000, 1567295770000000000, 1567295780000000000, 1567295790000000000, 1567295800000000000, 1567295810000000000, 1567295820000000000, 1567295830000000000,\n",
" 1567295840000000000, 1567295850000000000, 1567295860000000000, 1567295870000000000, 1567295880000000000, 1567295890000000000, 1567295900000000000, 1567295910000000000, 1567295920000000000,\n",
" 1567295930000000000, 1567295940000000000, 1567295950000000000, 1567295960000000000, 1567295970000000000, 1567295980000000000, 1567295990000000000])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ref['time'].tail(25).values"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"finished in 487.8sec\n"
]
}
],
"source": [
"import time\n",
"\n",
"start = time.time()\n",
"rows = []\n",
"\n",
"for example_time in ref['time'].tail(25).values:\n",
" last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n",
" last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n",
" last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n",
" of_btc_usd = df['ticker'] == 'btc_usd'\n",
" of_gdax = df['exch'] == 'gdax'\n",
" of_bmex = df['exch'] == 'bmex'\n",
"\n",
" g5 = last_5min & of_btc_usd & of_gdax\n",
" b5 = last_5min & of_btc_usd & of_bmex\n",
" g15 = last_15min & of_btc_usd & of_gdax\n",
" b15 = last_15min & of_btc_usd & of_bmex\n",
" g60 = last_60min & of_btc_usd & of_gdax\n",
" b60 = last_60min & of_btc_usd & of_bmex\n",
"\n",
" ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n",
" ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n",
" ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n",
" rows.append(dict(example_time=example_time, r5=ratio_5min, r15=ratio_15min, r60=ratio_60min))\n",
" \n",
"took = time.time() - start\n",
"print('finished in {:.1f}sec'.format(took))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.342554545733013"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hypothetical_full_took = (took / 25) * 5401808\n",
"hypothetical_full_took / 60 / 60 / 24 / 365"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.1016643329480867"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"92331988 / 908204336"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"19.513984975814818"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"took / 25"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('105,410,800.2', 105410800.15423629)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'{:,.1f}'.format(hypothetical_full_took), hypothetical_full_took"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"r5_delta 0.000000000002368\n",
"r15_delta 0.000000000010704\n",
"r60_delta 0.000000000005513\n",
"dtype: object"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(pd.DataFrame(rows).join(ref.set_index('time'), on='example_time', rsuffix='_rust')\n",
" .assign(r5_delta=lambda df: abs(df['r5'] - df['r5_rust']))\n",
" .assign(r15_delta=lambda df: abs(df['r15'] - df['r15_rust']))\n",
" .assign(r60_delta=lambda df: abs(df['r60'] - df['r60_rust']))\n",
")[['r5_delta','r15_delta','r60_delta']].max(axis=0).map(lambda x: '{:.15f}'.format(x))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 49
- 46
src/csv.rs View File

@@ -15,7 +15,7 @@ use serde::{Serialize, Deserialize};
use slog::Drain;
use pretty_toa::ThousandsSep;
use markets::crypto::{Exchange, Ticker, Side};
use pipelines::windows::WeightedAvgWindow;
use pipelines::windows::WeightedMeanWindow;


// equivalent to panic! but without the ugly 'thread main panicked' yada yada
@@ -214,24 +214,28 @@ fn hard_mode<R, W>(

let mut ratios: Lookbacks<f64> = Default::default();

let mut bwindows: Lookbacks<WeightedAvgWindow> =
let mut bmex_windows: Lookbacks<WeightedMeanWindow> =
Lookbacks {
p5: WeightedAvgWindow::new(ONE_SECOND * 60 * 5 ),
p15: WeightedAvgWindow::new(ONE_SECOND * 60 * 15),
p60: WeightedAvgWindow::new(ONE_SECOND * 60 * 60),
p5: WeightedMeanWindow::new(ONE_SECOND * 60 * 5 ),
p15: WeightedMeanWindow::new(ONE_SECOND * 60 * 15),
p60: WeightedMeanWindow::new(ONE_SECOND * 60 * 60),
};
let mut gwindows = bwindows.clone();
let mut gdax_windows = bmex_windows.clone();


#[inline(always)]
fn do_purge(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64) {
if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_wt_mean().unwrap_or(NAN); }
if windows.p15.purge(time) { prices.p15 = windows.p15.checked_wt_mean().unwrap_or(NAN); }
if windows.p60.purge(time) { prices.p60 = windows.p60.checked_wt_mean().unwrap_or(NAN); }
fn do_purge(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64) {
//if windows.p5.purge(time) { prices.p5 = windows.p5 .checked_weighted_mean().unwrap_or(NAN); }
//if windows.p15.purge(time) { prices.p15 = windows.p15.checked_weighted_mean().unwrap_or(NAN); }
//if windows.p60.purge(time) { prices.p60 = windows.p60.checked_weighted_mean().unwrap_or(NAN); }
windows.p5 .purge(time);
windows.p15.purge(time);
windows.p60.purge(time);
}

#[allow(unused)]
#[inline(always)]
fn do_update(windows: &mut Lookbacks<WeightedAvgWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) {
fn do_update(windows: &mut Lookbacks<WeightedMeanWindow>, prices: &mut Lookbacks<f64>, time: u64, price: f64, amount: f64) {
//prices.p5 = windows.p5 .update(time, price, amount).unwrap_or(NAN);
//prices.p15 = windows.p15.update(time, price, amount).unwrap_or(NAN);
//prices.p60 = windows.p60.update(time, price, amount).unwrap_or(NAN);
@@ -239,21 +243,20 @@ fn hard_mode<R, W>(
windows.p5 .push(time, price, amount);
windows.p15.push(time, price, amount);
windows.p60.push(time, price, amount);

}
macro_rules! update { // in macro to avoid repeating code once outside loop, and again in loop body
($trade:ident) => {{
match $trade.exch {
e!(bmex) => {
do_update(&mut bwindows, &mut bprices, $trade.time, $trade.price, $trade.amount);
//do_purge(&mut gwindows, &mut gprices, $trade.time);
do_update(&mut bmex_windows, &mut bprices, $trade.time, $trade.price, $trade.amount);
//do_purge(&mut gdax_windows, &mut gprices, $trade.time);
last_price = $trade.price;
}

e!(gdax) => {
do_update(&mut gwindows, &mut gprices, $trade.time, $trade.price, $trade.amount);
//do_purge(&mut bwindows, &mut bprices, $trade.time);
do_update(&mut gdax_windows, &mut gprices, $trade.time, $trade.price, $trade.amount);
//do_purge(&mut bmex_windows, &mut bprices, $trade.time);
last_price = $trade.price;
}
@@ -264,11 +267,11 @@ fn hard_mode<R, W>(

wtr.write_record(&[
"time",
"last",
"bmex_5min",
"gdax_5min",
"n_bmex_p5",
"n_gdax_p5",
//"last",
//"bmex_5min",
//"gdax_5min",
//"n_bmex_p5",
//"n_gdax_p5",
"r5",
"r15",
"r60",
@@ -279,7 +282,7 @@ fn hard_mode<R, W>(
//"n_gdax_p15",
//"n_gdax_p60",
//"gdax_p5_is_empty",
//"gdax_p5_checked_wt_mean",
//"gdax_p5_checked_weighted_mean",
//"tradetime_minus_cur_bucket",
]).map_err(|e| format!("writing CSV headers to output file failed: {}", e))?;

@@ -306,48 +309,48 @@ fn hard_mode<R, W>(
"n written" => n_written,
"trade.time" => trade.time,
"cur_bucket" => cur_bucket,
"gdax p5 len" => gwindows.p5.len(),
"gdax p5 wt avg" => gwindows.p5.wt_mean(),
"gdax p5 len" => gdax_windows.p5.len(),
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
);

do_purge(&mut gwindows, &mut gprices, cur_bucket);
do_purge(&mut bwindows, &mut bprices, cur_bucket);
do_purge(&mut gdax_windows, &mut gprices, cur_bucket);
do_purge(&mut bmex_windows, &mut bprices, cur_bucket);

debug!(logger, "finished purge";
"n" => n,
"n written" => n_written,
"trade.time" => trade.time,
"cur_bucket" => cur_bucket,
"gdax p5 len" => gwindows.p5.len(),
"gdax p5 wt avg" => gwindows.p5.wt_mean(),
"gdax p5 len" => gdax_windows.p5.len(),
"gdax p5 wt avg" => gdax_windows.p5.weighted_mean(),
);


ratios.p5 = bwindows.p5 .checked_wt_mean().unwrap_or(NAN) / gwindows.p5 .checked_wt_mean().unwrap_or(NAN);
ratios.p15 = bwindows.p15.checked_wt_mean().unwrap_or(NAN) / gwindows.p15.checked_wt_mean().unwrap_or(NAN);
ratios.p60 = bwindows.p60.checked_wt_mean().unwrap_or(NAN) / gwindows.p60.checked_wt_mean().unwrap_or(NAN);
ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean();
ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean();
ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean();

//ratios.p5 = bwindows.p5 .wt_mean() / gwindows.p5 .wt_mean();
//ratios.p15 = bwindows.p15.wt_mean() / gwindows.p15.wt_mean();
//ratios.p60 = bwindows.p60.wt_mean() / gwindows.p60.wt_mean();
//ratios.p5 = bmex_windows.p5 .weighted_mean() / gdax_windows.p5 .weighted_mean();
//ratios.p15 = bmex_windows.p15.weighted_mean() / gdax_windows.p15.weighted_mean();
//ratios.p60 = bmex_windows.p60.weighted_mean() / gdax_windows.p60.weighted_mean();

wtr.write_record(&[
&format!("{}", cur_bucket),
&format!("{}", last_price),
&format!("{}", bwindows.p5.checked_wt_mean().unwrap_or(NAN)),
&format!("{}", gwindows.p5.checked_wt_mean().unwrap_or(NAN)),
&format!("{}", bwindows.p5.len()),
&format!("{}", gwindows.p5.len()),
//&format!("{}", last_price),
//&format!("{}", bmex_windows.p5.checked_weighted_mean().unwrap_or(NAN)),
//&format!("{}", gdax_windows.p5.checked_weighted_mean().unwrap_or(NAN)),
//&format!("{}", bmex_windows.p5.len()),
//&format!("{}", gdax_windows.p5.len()),
&format!("{}", ratios.p5),
&format!("{}", ratios.p15),
&format!("{}", ratios.p60),
//&format!("{}", bwindows.p15.len()),
//&format!("{}", gwindows.p60.len()),
//&format!("{}", gwindows.p15.len()),
//&format!("{}", gwindows.p15.len()),
//&format!("{}", bwindows.p60.len()),
//&format!("{}", bwindows.p5.is_empty()),
//&format!("{:?}", bwindows.p5.checked_wt_mean()),
//&format!("{}", bmex_windows.p15.len()),
//&format!("{}", gdax_windows.p60.len()),
//&format!("{}", gdax_windows.p15.len()),
//&format!("{}", gdax_windows.p15.len()),
//&format!("{}", bmex_windows.p60.len()),
//&format!("{}", bmex_windows.p5.is_empty()),
//&format!("{:?}", bmex_windows.p5.checked_weighted_mean()),
//&format!("{}", trade.time - cur_bucket),

]).map_err(|e| {


+ 125
- 15
src/windows.rs View File

@@ -1,5 +1,28 @@
use std::collections::VecDeque;

/// Calculates online weighted average for a rolling, time-based window
#[derive(Clone)]
pub struct WeightedMeanWindow {
/// The size of the window. On `purge`, any `WeightedPoint` items are considered
/// expired if the supplied `time` parameter is greater than `size` from the
/// `time` attribute of that `WeightedPoint` item.
size: u64,
/// The weights and values with times that are "currently" in the aggregation
/// window. On `push`, items are added to the "back" of the vedeque. On `purge`,
/// items with a `time` that is > `size` difference relative to the `time` passed
/// to `purge` are considered expired and removed. In both cases, adding and removing,
/// the incremental accumulated sums in `w_sum` and `sum_w` are updated.
items: VecDeque<WeightedPoint>,
/// The sum of the value * weight for each of the `WeightedPoint`s in `items`.
w_sum: f64,
/// The sum of the weights of each of the `WeightedPoint`s in `items`.
sum_w: f64,
}

/// Stores the time, value and weight for an item "currently" inside the
/// aggregation window of a `WeightedMeanWindow`, allowing its value and
/// weight to be subtracted from the accumulated sums of the window when
/// the item becomes expired.
#[derive(Debug, Clone)]
pub struct WeightedPoint {
pub time: u64,
@@ -12,16 +35,7 @@ pub struct WeightedPoint {
pub wt: f64,
}

#[derive(Clone)]
pub struct WeightedAvgWindow {
size: u64,
items: VecDeque<WeightedPoint>,
w_sum: f64,
sum_w: f64,
//w_mean: f64,
}

impl WeightedAvgWindow {
impl WeightedMeanWindow {
pub fn new(size: u64) -> Self {
Self {
size,
@@ -35,9 +49,19 @@ impl WeightedAvgWindow {
///
/// Returns `true` if any items were removed.
pub fn purge(&mut self, time: u64) -> bool {

// this is somewhat awkwardly implemented, but there is not anything like
// `drain_while` on `VecDeque` (or `Vec`) that would work like `take_while`,
// except also removing the items. Since we need the data in the items we
// are removing to update `sum_w` and `w_sum`, we loop over the expired
// items first, counting them in `n_remove`, then actually remove them
// in a second pass.

let mut n_remove = 0;

{
// extra scope needed to shush the borrow checker

let items = &self.items;
let w_sum = &mut self.w_sum;
let sum_w = &mut self.sum_w;
@@ -52,7 +76,10 @@ impl WeightedAvgWindow {

for _ in 0..n_remove { self.items.pop_front(); }

// when items is empty, set w_sum, sum_w to 0.0
// when items is empty, set w_sum, sum_w to 0.0. the motive
// of this approach, versus an if block with assignment, is
// for the code to be "branchless" and do the same work each
// time, in a cache- and branch predictor-friendly manner.
let zeroer: f64 = ( ! self.items.is_empty()) as u8 as f64;
self.w_sum *= zeroer;
self.sum_w *= zeroer;
@@ -61,6 +88,10 @@ impl WeightedAvgWindow {
}

/// Add a new item, updating incremental calculations in the process.
///
/// Note: it is assumed that `time` is >= the highest `time` value for any previous
/// item. The expiration logic `purge` relies on the items being added to a
/// `WeightedMeanWindow` in chronological order.
pub fn push(&mut self, time: u64, val: f64, wt: f64) {
let wt_val: f64 = val * wt;
self.w_sum += wt_val;
@@ -72,24 +103,26 @@ impl WeightedAvgWindow {
/// accumulators.
///
/// Note; this value is not cached.
pub fn wt_mean(&self) -> f64 {
pub fn weighted_mean(&self) -> f64 {
self.w_sum / self.sum_w
}

/// Checks whether items `is_empty` before trying to calculate.
/// Returns None if items is empty.
pub fn checked_wt_mean(&self) -> Option<f64> {
///
/// Note: this value is not cached.
pub fn checked_weighted_mean(&self) -> Option<f64> {
match self.is_empty() {
true => None,
false => Some(self.w_sum / self.sum_w),
}
}

/// Purge, push and get `checked_wt_mean`, all in one convenient step.
/// Purge, push and get `checked_weighted_mean`, all in one convenient step.
pub fn update(&mut self, time: u64, val: f64, wt: f64) -> Option<f64> {
self.purge(time);
self.push(time, val, wt);
self.checked_wt_mean()
self.checked_weighted_mean()
}

pub fn len(&self) -> usize { self.items.len() }
@@ -97,3 +130,80 @@ impl WeightedAvgWindow {
pub fn is_empty(&self) -> bool { self.items.is_empty() }
}

#[allow(unused)]
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;

#[test]
fn weighted_mean_output_matches_numpy_average() {

let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248,
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ];

let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117,
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ];

let mut w = WeightedMeanWindow::new(1_000_000_000);

for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() {
w.push(i as u64, val, weight);
}

w.purge(11);

assert_eq!(w.items.len(), 10);
assert_relative_eq!(w.weighted_mean(), 0.63599718086101786, epsilon = 0.0001);
}

#[test]
fn checked_weighted_mean_returns_none_when_items_is_empty_and_unchecked_is_nan() {
let w = WeightedMeanWindow::new(1_000_000_000);
assert!(w.is_empty());
assert_relative_eq!(w.sum_w, 0.0f64);
assert_relative_eq!(w.w_sum, 0.0f64);

assert!(w.checked_weighted_mean().is_none());
assert!(w.weighted_mean().is_nan());
}

#[test]
fn purge_expires_items() {
let xs: Vec<f64> = vec![ 0.41305045, 0.93555897, 0.77885094, 0.9896831 , 0.79720248,
0.69497414, 0.34953127, 0.02331158, 0.89858514, 0.38312421 ];

let ws: Vec<f64> = vec![ 0.01256151, 0.58996267, 0.6474601 , 0.33013727, 0.92964117,
0.21427296, 0.42990663, 0.81912449, 0.99428442, 0.71875903 ];

let xs_times_ws: Vec<f64> = xs.iter().zip(ws.iter()).map(|(&x,&w)| x * w).collect();

let mut w = WeightedMeanWindow::new(10);

for (i, (val, weight)) in xs.iter().cloned().zip(ws.iter().cloned()).enumerate() {
w.push(i as u64, val, weight);
}

w.purge(10);

assert_eq!(w.items.len(), 10);

w.purge(11);

assert_eq!(w.items.len(), 9);
assert_relative_eq!(w.sum_w, (&ws[1..]).iter().sum::<f64>(), epsilon = 1e-5);
assert_relative_eq!(w.w_sum, (&xs_times_ws[1..]).iter().sum::<f64>(), epsilon = 1e-5);

w.purge(11);

assert_eq!(w.items.len(), 9);

w.purge(12);

assert_eq!(w.items.len(), 8);
assert_relative_eq!(w.sum_w, (&ws[2..]).iter().sum::<f64>(), epsilon = 1e-5);
assert_relative_eq!(w.w_sum, (&xs_times_ws[2..]).iter().sum::<f64>(), epsilon = 1e-5);
}
}



Loading…
Cancel
Save