|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "CPU times: user 1min 8s, sys: 7.72 s, total: 1min 16s\n",
- "Wall time: 1min 16s\n",
- "<class 'pandas.core.frame.DataFrame'>\n",
- "RangeIndex: 92331988 entries, 0 to 92331987\n",
- "Data columns (total 7 columns):\n",
- "time int64\n",
- "amount float64\n",
- "exch object\n",
- "price float64\n",
- "server_time int64\n",
- "side object\n",
- "ticker object\n",
- "dtypes: float64(2), int64(2), object(3)\n",
- "memory usage: 4.8+ GB\n"
- ]
- }
- ],
- "source": [
- "%time df = pd.read_csv('/xfs/sample.csv')\n",
- "df.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>time</th>\n",
- " <th>amount</th>\n",
- " <th>exch</th>\n",
- " <th>price</th>\n",
- " <th>server_time</th>\n",
- " <th>side</th>\n",
- " <th>ticker</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>1561939200002479372</td>\n",
- " <td>1.4894</td>\n",
- " <td>bnce</td>\n",
- " <td>292.7000</td>\n",
- " <td>1561939199919000064</td>\n",
- " <td>NaN</td>\n",
- " <td>eth_usd</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>1561939200011035644</td>\n",
- " <td>0.0833</td>\n",
- " <td>btfx</td>\n",
- " <td>10809.0000</td>\n",
- " <td>1561939199927000064</td>\n",
- " <td>bid</td>\n",
- " <td>btc_usd</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>1561939200011055712</td>\n",
- " <td>0.8333</td>\n",
- " <td>btfx</td>\n",
- " <td>10809.0000</td>\n",
- " <td>1561939199927000064</td>\n",
- " <td>bid</td>\n",
- " <td>btc_usd</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>1561939200019037617</td>\n",
- " <td>0.0831</td>\n",
- " <td>bnce</td>\n",
- " <td>10854.1000</td>\n",
- " <td>1561939199935000064</td>\n",
- " <td>NaN</td>\n",
- " <td>btc_usd</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>1561939200026450471</td>\n",
- " <td>0.1250</td>\n",
- " <td>okex</td>\n",
- " <td>123.2100</td>\n",
- " <td>1561939200026450432</td>\n",
- " <td>ask</td>\n",
- " <td>ltc_usd</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " time amount exch price server_time side \\\n",
- "0 1561939200002479372 1.4894 bnce 292.7000 1561939199919000064 NaN \n",
- "1 1561939200011035644 0.0833 btfx 10809.0000 1561939199927000064 bid \n",
- "2 1561939200011055712 0.8333 btfx 10809.0000 1561939199927000064 bid \n",
- "3 1561939200019037617 0.0831 bnce 10854.1000 1561939199935000064 NaN \n",
- "4 1561939200026450471 0.1250 okex 123.2100 1561939200026450432 ask \n",
- "\n",
- " ticker \n",
- "0 eth_usd \n",
- "1 btc_usd \n",
- "2 btc_usd \n",
- "3 btc_usd \n",
- "4 ltc_usd "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(True, True, True)"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "SECOND = int(1e9)\n",
- "\n",
- "example_time = 1567295920000000000\n",
- "\n",
- "last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n",
- "last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n",
- "last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n",
- "of_btc_usd = df['ticker'] == 'btc_usd'\n",
- "of_gdax = df['exch'] == 'gdax'\n",
- "of_bmex = df['exch'] == 'bmex'\n",
- "\n",
- "g5 = last_5min & of_btc_usd & of_gdax\n",
- "b5 = last_5min & of_btc_usd & of_bmex\n",
- "g15 = last_15min & of_btc_usd & of_gdax\n",
- "b15 = last_15min & of_btc_usd & of_bmex\n",
- "g60 = last_60min & of_btc_usd & of_gdax\n",
- "b60 = last_60min & of_btc_usd & of_bmex\n",
- "\n",
- "ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n",
- "ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n",
- "ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n",
- "\n",
- "abs(ratio_5min - 1.000474060563638) < 1e-6, abs(ratio_15min - 1.0005019306061411) < 1e-6, abs(ratio_60min - 1.0002338013889658) < 1e-6"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>time</th>\n",
- " <th>last</th>\n",
- " <th>bmex_5min</th>\n",
- " <th>gdax_5min</th>\n",
- " <th>n_bmex_p5</th>\n",
- " <th>n_gdax_p5</th>\n",
- " <th>r5</th>\n",
- " <th>r15</th>\n",
- " <th>r60</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>1561939210000000000</td>\n",
- " <td>10758.5800</td>\n",
- " <td>10760.7205</td>\n",
- " <td>10760.4593</td>\n",
- " <td>22</td>\n",
- " <td>28</td>\n",
- " <td>1.0000</td>\n",
- " <td>1.0000</td>\n",
- " <td>1.0000</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>1561939220000000000</td>\n",
- " <td>10770.0000</td>\n",
- " <td>10763.6811</td>\n",
- " <td>10761.2528</td>\n",
- " <td>230</td>\n",
- " <td>75</td>\n",
- " <td>1.0002</td>\n",
- " <td>1.0002</td>\n",
- " <td>1.0002</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>1561939230000000000</td>\n",
- " <td>10758.0100</td>\n",
- " <td>10761.8843</td>\n",
- " <td>10760.1596</td>\n",
- " <td>418</td>\n",
- " <td>120</td>\n",
- " <td>1.0002</td>\n",
- " <td>1.0002</td>\n",
- " <td>1.0002</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>1561939240000000000</td>\n",
- " <td>10752.5000</td>\n",
- " <td>10757.6829</td>\n",
- " <td>10760.3630</td>\n",
- " <td>507</td>\n",
- " <td>147</td>\n",
- " <td>0.9998</td>\n",
- " <td>0.9998</td>\n",
- " <td>0.9998</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>1561939250000000000</td>\n",
- " <td>10772.6900</td>\n",
- " <td>10757.5702</td>\n",
- " <td>10763.0840</td>\n",
- " <td>537</td>\n",
- " <td>191</td>\n",
- " <td>0.9995</td>\n",
- " <td>0.9995</td>\n",
- " <td>0.9995</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " time last bmex_5min gdax_5min n_bmex_p5 n_gdax_p5 \\\n",
- "0 1561939210000000000 10758.5800 10760.7205 10760.4593 22 28 \n",
- "1 1561939220000000000 10770.0000 10763.6811 10761.2528 230 75 \n",
- "2 1561939230000000000 10758.0100 10761.8843 10760.1596 418 120 \n",
- "3 1561939240000000000 10752.5000 10757.6829 10760.3630 507 147 \n",
- "4 1561939250000000000 10772.6900 10757.5702 10763.0840 537 191 \n",
- "\n",
- " r5 r15 r60 \n",
- "0 1.0000 1.0000 1.0000 \n",
- "1 1.0002 1.0002 1.0002 \n",
- "2 1.0002 1.0002 1.0002 \n",
- "3 0.9998 0.9998 0.9998 \n",
- "4 0.9995 0.9995 0.9995 "
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ref = pd.read_csv('../var/hard.csv')\n",
- "ref.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([1567295750000000000, 1567295760000000000, 1567295770000000000, 1567295780000000000, 1567295790000000000, 1567295800000000000, 1567295810000000000, 1567295820000000000, 1567295830000000000,\n",
- " 1567295840000000000, 1567295850000000000, 1567295860000000000, 1567295870000000000, 1567295880000000000, 1567295890000000000, 1567295900000000000, 1567295910000000000, 1567295920000000000,\n",
- " 1567295930000000000, 1567295940000000000, 1567295950000000000, 1567295960000000000, 1567295970000000000, 1567295980000000000, 1567295990000000000])"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ref['time'].tail(25).values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "finished in 487.8sec\n"
- ]
- }
- ],
- "source": [
- "import time\n",
- "\n",
- "start = time.time()\n",
- "rows = []\n",
- "\n",
- "for example_time in ref['time'].tail(25).values:\n",
- " last_5min = (df['time'] > example_time - SECOND * 60 * 5) & (df['time'] <= example_time)\n",
- " last_15min = (df['time'] > example_time - SECOND * 60 * 15) & (df['time'] <= example_time)\n",
- " last_60min = (df['time'] > example_time - SECOND * 60 * 60) & (df['time'] <= example_time)\n",
- " of_btc_usd = df['ticker'] == 'btc_usd'\n",
- " of_gdax = df['exch'] == 'gdax'\n",
- " of_bmex = df['exch'] == 'bmex'\n",
- "\n",
- " g5 = last_5min & of_btc_usd & of_gdax\n",
- " b5 = last_5min & of_btc_usd & of_bmex\n",
- " g15 = last_15min & of_btc_usd & of_gdax\n",
- " b15 = last_15min & of_btc_usd & of_bmex\n",
- " g60 = last_60min & of_btc_usd & of_gdax\n",
- " b60 = last_60min & of_btc_usd & of_bmex\n",
- "\n",
- " ratio_5min = ((df.loc[b5, 'price'] * df.loc[b5, 'amount']).sum() / df.loc[b5, 'amount'].sum()) / ((df.loc[g5, 'price'] * df.loc[g5, 'amount']).sum() / df.loc[g5, 'amount'].sum())\n",
- " ratio_15min = ((df.loc[b15, 'price'] * df.loc[b15, 'amount']).sum() / df.loc[b15, 'amount'].sum()) / ((df.loc[g15, 'price'] * df.loc[g15, 'amount']).sum() / df.loc[g15, 'amount'].sum())\n",
- " ratio_60min = ((df.loc[b60, 'price'] * df.loc[b60, 'amount']).sum() / df.loc[b60, 'amount'].sum()) / ((df.loc[g60, 'price'] * df.loc[g60, 'amount']).sum() / df.loc[g60, 'amount'].sum())\n",
- " rows.append(dict(example_time=example_time, r5=ratio_5min, r15=ratio_15min, r60=ratio_60min))\n",
- " \n",
- "took = time.time() - start\n",
- "print('finished in {:.1f}sec'.format(took))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3.342554545733013"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "hypothetical_full_took = (took / 25) * 5401808\n",
- "hypothetical_full_took / 60 / 60 / 24 / 365"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.1016643329480867"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "92331988 / 908204336"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "19.513984975814818"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "took / 25"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "('105,410,800.2', 105410800.15423629)"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "'{:,.1f}'.format(hypothetical_full_took), hypothetical_full_took"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "r5_delta 0.000000000002368\n",
- "r15_delta 0.000000000010704\n",
- "r60_delta 0.000000000005513\n",
- "dtype: object"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "(pd.DataFrame(rows).join(ref.set_index('time'), on='example_time', rsuffix='_rust')\n",
- " .assign(r5_delta=lambda df: abs(df['r5'] - df['r5_rust']))\n",
- " .assign(r15_delta=lambda df: abs(df['r15'] - df['r15_rust']))\n",
- " .assign(r60_delta=lambda df: abs(df['r60'] - df['r60_rust']))\n",
- ")[['r5_delta','r15_delta','r60_delta']].max(axis=0).map(lambda x: '{:.15f}'.format(x))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
|