{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1min 8s, sys: 7.72 s, total: 1min 16s\n", "Wall time: 1min 16s\n", "\n", "RangeIndex: 92331988 entries, 0 to 92331987\n", "Data columns (total 7 columns):\n", "time int64\n", "amount float64\n", "exch object\n", "price float64\n", "server_time int64\n", "side object\n", "ticker object\n", "dtypes: float64(2), int64(2), object(3)\n", "memory usage: 4.8+ GB\n" ] } ], "source": [ "%time df = pd.read_csv('/xfs/sample.csv')\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timeamountexchpriceserver_timesideticker
015619392000024793721.4894bnce292.70001561939199919000064NaNeth_usd
115619392000110356440.0833btfx10809.00001561939199927000064bidbtc_usd
215619392000110557120.8333btfx10809.00001561939199927000064bidbtc_usd
315619392000190376170.0831bnce10854.10001561939199935000064NaNbtc_usd
415619392000264504710.1250okex123.21001561939200026450432askltc_usd
\n", "
" ], "text/plain": [ " time amount exch price server_time side \\\n", "0 1561939200002479372 1.4894 bnce 292.7000 1561939199919000064 NaN \n", "1 1561939200011035644 0.0833 btfx 10809.0000 1561939199927000064 bid \n", "2 1561939200011055712 0.8333 btfx 10809.0000 1561939199927000064 bid \n", "3 1561939200019037617 0.0831 bnce 10854.1000 1561939199935000064 NaN \n", "4 1561939200026450471 0.1250 okex 123.2100 1561939200026450432 ask \n", "\n", " ticker \n", "0 eth_usd \n", "1 btc_usd \n", "2 btc_usd \n", "3 btc_usd \n", "4 ltc_usd " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(True, True, True)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SECOND = int(1e9)\n", "\n", "example_time = 1567295920000000000\n", "\n", "last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", "last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", "last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", "of_btc_usd = df['ticker'] == 'btc_usd'\n", "of_gdax = df['exch'] == 'gdax'\n", "of_bmex = df['exch'] == 'bmex'\n", "\n", "g5 = last_5min & of_btc_usd & of_gdax\n", "b5 = last_5min & of_btc_usd & of_bmex\n", "g15 = last_15min & of_btc_usd & of_gdax\n", "b15 = last_15min & of_btc_usd & of_bmex\n", "g60 = last_60min & of_btc_usd & of_gdax\n", "b60 = last_60min & of_btc_usd & of_bmex\n", "\n", "ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", "ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", "ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", "\n", "abs(ratio_5min - 1.000474060563638) < 1e-6, abs(ratio_15min - 1.0005019306061411) < 1e-6, abs(ratio_60min - 1.0002338013889658) < 1e-6" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timelastbmex_5mingdax_5minn_bmex_p5n_gdax_p5r5r15r60
0156193921000000000010758.580010760.720510760.459322281.00001.00001.0000
1156193922000000000010770.000010763.681110761.2528230751.00021.00021.0002
2156193923000000000010758.010010761.884310760.15964181201.00021.00021.0002
3156193924000000000010752.500010757.682910760.36305071470.99980.99980.9998
4156193925000000000010772.690010757.570210763.08405371910.99950.99950.9995
\n", "
" ], "text/plain": [ " time last bmex_5min gdax_5min n_bmex_p5 n_gdax_p5 \\\n", "0 1561939210000000000 10758.5800 10760.7205 10760.4593 22 28 \n", "1 1561939220000000000 10770.0000 10763.6811 10761.2528 230 75 \n", "2 1561939230000000000 10758.0100 10761.8843 10760.1596 418 120 \n", "3 1561939240000000000 10752.5000 10757.6829 10760.3630 507 147 \n", "4 1561939250000000000 10772.6900 10757.5702 10763.0840 537 191 \n", "\n", " r5 r15 r60 \n", "0 1.0000 1.0000 1.0000 \n", "1 1.0002 1.0002 1.0002 \n", "2 1.0002 1.0002 1.0002 \n", "3 0.9998 0.9998 0.9998 \n", "4 0.9995 0.9995 0.9995 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ref = pd.read_csv('../var/hard.csv')\n", "ref.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1567295750000000000, 1567295760000000000, 1567295770000000000, 1567295780000000000, 1567295790000000000, 1567295800000000000, 1567295810000000000, 1567295820000000000, 1567295830000000000,\n", " 1567295840000000000, 1567295850000000000, 1567295860000000000, 1567295870000000000, 1567295880000000000, 1567295890000000000, 1567295900000000000, 1567295910000000000, 1567295920000000000,\n", " 1567295930000000000, 1567295940000000000, 1567295950000000000, 1567295960000000000, 1567295970000000000, 1567295980000000000, 1567295990000000000])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ref['time'].tail(25).values" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "finished in 487.8sec\n" ] } ], "source": [ "import time\n", "\n", "start = time.time()\n", "rows = []\n", "\n", "for example_time in ref['time'].tail(25).values:\n", " last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n", " last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n", " last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n", " of_btc_usd = df['ticker'] == 'btc_usd'\n", " of_gdax = df['exch'] == 'gdax'\n", " of_bmex = df['exch'] == 'bmex'\n", "\n", " g5 = last_5min & of_btc_usd & of_gdax\n", " b5 = last_5min & of_btc_usd & of_bmex\n", " g15 = last_15min & of_btc_usd & of_gdax\n", " b15 = last_15min & of_btc_usd & of_bmex\n", " g60 = last_60min & of_btc_usd & of_gdax\n", " b60 = last_60min & of_btc_usd & of_bmex\n", "\n", " ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n", " ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n", " ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n", " rows.append(dict(example_time=example_time, r5=ratio_5min, r15=ratio_15min, r60=ratio_60min))\n", " \n", "took = time.time() - start\n", "print('finished in {:.1f}sec'.format(took))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3.342554545733013" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hypothetical_full_took = (took / 25) * 5401808\n", "hypothetical_full_took / 60 / 60 / 24 / 365" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.1016643329480867" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "92331988 / 908204336" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19.513984975814818" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "took / 25" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('105,410,800.2', 105410800.15423629)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'{:,.1f}'.format(hypothetical_full_took), hypothetical_full_took" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "r5_delta 0.000000000002368\n", "r15_delta 0.000000000010704\n", "r60_delta 0.000000000005513\n", "dtype: object" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(pd.DataFrame(rows).join(ref.set_index('time'), on='example_time', rsuffix='_rust')\n", " .assign(r5_delta=lambda df: abs(df['r5'] - df['r5_rust']))\n", " .assign(r15_delta=lambda df: abs(df['r15'] - df['r15_rust']))\n", " .assign(r60_delta=lambda df: abs(df['r60'] - df['r60_rust']))\n", ")[['r5_delta','r15_delta','r60_delta']].max(axis=0).map(lambda x: '{:.15f}'.format(x))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 2 }