Commit 8a369262 authored by Dr.李's avatar Dr.李

update notebook

parent 8e2520bc
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"server = '10.63.6.176'\n", "server = 'rm-bp1psdz5615icqc0yo.mysql.rds.aliyuncs.com'\n",
"user = 'sa'\n", "user = 'sa'\n",
"pwd = 'we083826'\n", "pwd = 'We051253524522'\n",
"\n", "\n",
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))" "engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
] ]
...@@ -157,13 +157,33 @@ ...@@ -157,13 +157,33 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n", "risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300['Market'] = 1." "risk_factor_300['Market'] = 1."
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor_data = pd.read_sql('select * from factor_data', engine)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
...@@ -172,7 +192,171 @@ ...@@ -172,7 +192,171 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')" "factor_data.to_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols = pd.Series(['CFinc1', 'BDTO', 'RVOL', 'CHV', 'VAL'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols.to_hdf('multifactor.hdf', 'prod_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols = pd.Series(risk_factor_500.columns[2:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols.to_hdf('multifactor.hdf', 'risk_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols = pd.Series(factor_data.columns[5:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols.to_hdf('multifactor.hdf', 'common_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_500.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300 = pd.read_sql('select * from return_data_300', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300.to_hdf('multifactor.hdf', 'return_data_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500 = pd.read_sql('select * from return_data_500', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500.to_hdf('multifactor.hdf', 'return_data_500')"
] ]
}, },
{ {
...@@ -187,7 +371,7 @@ ...@@ -187,7 +371,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python [default]",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
...@@ -201,9 +385,9 @@ ...@@ -201,9 +385,9 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.1" "version": "3.5.3"
} }
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 2 "nbformat_minor": 1
} }
...@@ -18,12 +18,13 @@ ...@@ -18,12 +18,13 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%%time\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n", "factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n", "index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n", "index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n", "prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n", "risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"trade_data = pd.read_hdf('multifactor.hdf', 'trade_data')\n", "return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n", "prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n", "risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')" "common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
...@@ -42,7 +43,9 @@ ...@@ -42,7 +43,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"del risk_factor_500['Bank']\n", "del risk_factor_500['Bank']\n",
...@@ -54,7 +57,9 @@ ...@@ -54,7 +57,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"index_components_name = '500Weight'\n", "index_components_name = '500Weight'\n",
...@@ -72,16 +77,7 @@ ...@@ -72,16 +77,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"decay = 2" "return_data_500['dret'] = return_data_500['D1LogReturn']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trade_data['dret'] = trade_data.Return.groupby(trade_data.Code).shift(-decay)"
] ]
}, },
{ {
...@@ -93,7 +89,7 @@ ...@@ -93,7 +89,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n", "index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['dret_b'] = index_data['ret'] .shift(-decay)" "index_data['dret_b'] = index_data['ret'] .shift(-2)"
] ]
}, },
{ {
...@@ -102,19 +98,22 @@ ...@@ -102,19 +98,22 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%%time\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n", "total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n", "total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n", "total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n", "total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data.fillna(0, inplace=True)\n", "total_data.fillna(0, inplace=True)\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n", "total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, trade_data[['Date', 'Code', 'dret']], on=['Date', 'Code'])" "total_data = pd.merge(total_data, return_data_500[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"total_data = total_data[total_data[index_components_name] != 0]" "total_data = total_data[total_data[index_components_name] != 0]"
...@@ -144,7 +143,9 @@ ...@@ -144,7 +143,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n", "total_factors = common_factor_cols.append(prod_factor_cols)\n",
...@@ -170,7 +171,9 @@ ...@@ -170,7 +171,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)" "normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
...@@ -210,7 +213,9 @@ ...@@ -210,7 +213,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n", "ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
...@@ -220,7 +225,9 @@ ...@@ -220,7 +225,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index" "top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
...@@ -283,7 +290,9 @@ ...@@ -283,7 +290,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"pos_corr = pos_df.corr()" "pos_corr = pos_df.corr()"
...@@ -353,7 +362,9 @@ ...@@ -353,7 +362,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n", "ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
...@@ -363,7 +374,9 @@ ...@@ -363,7 +374,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))" "ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
...@@ -372,7 +385,9 @@ ...@@ -372,7 +385,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(total_data)" "len(total_data)"
...@@ -390,7 +405,7 @@ ...@@ -390,7 +405,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python [default]",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
...@@ -404,7 +419,7 @@ ...@@ -404,7 +419,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.1" "version": "3.5.3"
} }
}, },
"nbformat": 4, "nbformat": 4,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment