Commit 8a369262 authored by Dr.李's avatar Dr.李

update notebook

parent 8e2520bc
......@@ -20,9 +20,9 @@
},
"outputs": [],
"source": [
"server = '10.63.6.176'\n",
"server = 'rm-bp1psdz5615icqc0yo.mysql.rds.aliyuncs.com'\n",
"user = 'sa'\n",
"pwd = 'we083826'\n",
"pwd = 'We051253524522'\n",
"\n",
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
]
......@@ -157,13 +157,33 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300['Market'] = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor_data = pd.read_sql('select * from factor_data', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
......@@ -172,7 +192,171 @@
},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
"factor_data.to_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols = pd.Series(['CFinc1', 'BDTO', 'RVOL', 'CHV', 'VAL'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols.to_hdf('multifactor.hdf', 'prod_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols = pd.Series(risk_factor_500.columns[2:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols.to_hdf('multifactor.hdf', 'risk_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols = pd.Series(factor_data.columns[5:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols.to_hdf('multifactor.hdf', 'common_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_500.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300 = pd.read_sql('select * from return_data_300', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300.to_hdf('multifactor.hdf', 'return_data_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500 = pd.read_sql('select * from return_data_500', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500.to_hdf('multifactor.hdf', 'return_data_500')"
]
},
{
......@@ -187,7 +371,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
......@@ -201,9 +385,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 1
}
......@@ -18,12 +18,13 @@
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"trade_data = pd.read_hdf('multifactor.hdf', 'trade_data')\n",
"return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
......@@ -42,7 +43,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
......@@ -54,7 +57,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_components_name = '500Weight'\n",
......@@ -72,16 +77,7 @@
},
"outputs": [],
"source": [
"decay = 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trade_data['dret'] = trade_data.Return.groupby(trade_data.Code).shift(-decay)"
"return_data_500['dret'] = return_data_500['D1LogReturn']"
]
},
{
......@@ -93,7 +89,7 @@
"outputs": [],
"source": [
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['dret_b'] = index_data['ret'] .shift(-decay)"
"index_data['dret_b'] = index_data['ret'] .shift(-2)"
]
},
{
......@@ -102,19 +98,22 @@
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data.fillna(0, inplace=True)\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, trade_data[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
"total_data = pd.merge(total_data, return_data_500[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data = total_data[total_data[index_components_name] != 0]"
......@@ -144,7 +143,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
......@@ -170,7 +171,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
......@@ -210,7 +213,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
......@@ -220,7 +225,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
......@@ -283,7 +290,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()"
......@@ -353,7 +362,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
......@@ -363,7 +374,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
......@@ -372,7 +385,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"len(total_data)"
......@@ -390,7 +405,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
......@@ -404,7 +419,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
"version": "3.5.3"
}
},
"nbformat": 4,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment