Commit e9a1e696 authored by Dr.李's avatar Dr.李

added factor analysis

parent 8a369262
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
"source": [ "source": [
"%%time\n", "%%time\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n", "factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"common_500 = pd.read_hdf('multifactor.hdf', 'common_500')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n", "index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n", "index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n", "prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
...@@ -27,7 +28,8 @@ ...@@ -27,7 +28,8 @@
"return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n", "return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n", "prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n", "risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')" "common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')\n",
"common_500_factor_cols = pd.read_hdf('multifactor.hdf', 'common_500_factor_cols')"
] ]
}, },
{ {
...@@ -40,20 +42,6 @@ ...@@ -40,20 +42,6 @@
"---------------------" "---------------------"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']\n",
"del factor_data['申万二级行业']\n",
"del factor_data['申万三级行业']"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
...@@ -63,10 +51,7 @@ ...@@ -63,10 +51,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"index_components_name = '500Weight'\n", "index_components_name = '500Weight'\n",
"benchmark = 'zz500'\n", "benchmark = 'zz500'"
"\n",
"del risk_factor_cols[25]\n",
"del risk_factor_cols[26]"
] ]
}, },
{ {
...@@ -100,6 +85,7 @@ ...@@ -100,6 +85,7 @@
"source": [ "source": [
"%%time\n", "%%time\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n", "total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, common_500[common_500_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n", "total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n", "total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n", "total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
...@@ -119,6 +105,15 @@ ...@@ -119,6 +105,15 @@
"total_data = total_data[total_data[index_components_name] != 0]" "total_data = total_data[total_data[index_components_name] != 0]"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(total_data)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
...@@ -130,9 +125,7 @@ ...@@ -130,9 +125,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from alphamind.data.standardize import standardize\n", "from alphamind.data.standardize import standardize\n",
...@@ -143,12 +136,13 @@ ...@@ -143,12 +136,13 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n", "total_factors = common_factor_cols.append(prod_factor_cols)\n",
"total_factors = total_factors.append(common_500_factor_cols)\n",
"\n",
"#risk_factor_cols = risk_factor_cols[risk_factor_cols != 'Size']\n",
"\n", "\n",
"all_factors = total_data[total_factors]\n", "all_factors = total_data[total_factors]\n",
"risk_factors = total_data[risk_factor_cols]\n", "risk_factors = total_data[risk_factor_cols]\n",
...@@ -163,17 +157,14 @@ ...@@ -163,17 +157,14 @@
"source": [ "source": [
"%%time\n", "%%time\n",
"factor_processed = neutralize(risk_factors.values,\n", "factor_processed = neutralize(risk_factors.values,\n",
" standardize(winsorize_normal(all_factors.values, groups=groups),\n", " winsorize_normal(all_factors.values, groups=groups),\n",
" groups=groups),\n",
" groups=groups)" " groups=groups)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)" "normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
...@@ -304,15 +295,28 @@ ...@@ -304,15 +295,28 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"pos_corr.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()]" "pos_corr = pos_corr.loc[total_factors, total_factors]"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "code",
"execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [],
"source": [ "source": [
"# Save Data\n", "turn_over_table = {}\n",
"------------------------" "pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors]\n",
"turn_over_table"
] ]
}, },
{ {
...@@ -329,13 +333,16 @@ ...@@ -329,13 +333,16 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"writer = ExcelWriter('Summary_500.xlsx')\n", "writer = ExcelWriter('Summary_500_long_top_tmp.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n", "ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n", "ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n", "pos_corr.to_excel(writer, 'pos_corr')\n",
"turn_over_table.to_excel(writer, 'turn_over')\n",
"writer.close()" "writer.close()"
] ]
}, },
...@@ -343,54 +350,199 @@ ...@@ -343,54 +350,199 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Raw Product Factor \n", "# Factor Performance (risk neutral)\n",
"-----------------------------------------" "---------------------------------"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"pos_data = rank_build(total_data[prod_factor_cols].values, use_rank, groups)\n", "from alphamind.portfolio.linearbuilder import linear_build"
"pos_df = pd.DataFrame(pos_data, columns=prod_factor_cols, index=total_data.Date) / use_rank"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true "outputs": [],
}, "source": [
"total_data[total_factors] = normed_factor.values\n",
"total_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"marke_netural_pos = {}\n",
"for i, name in enumerate(total_factors):\n",
"\n",
" lbound_exposure = -1e-2 * np.ones(len(risk_factor_cols))\n",
" ubound_exposure = 1e-2 * np.ones(len(risk_factor_cols))\n",
"\n",
" def get_benchmark_match_pos(x, name):\n",
" er = x[name].values\n",
" bm = x[index_components_name].values / 100.\n",
" lbound = 0.\n",
" ubound = 0.01 + bm\n",
" risk_exposure = x[risk_factor_cols].values\n",
"\n",
" status, value , ret = linear_build(er,\n",
" lbound=lbound,\n",
" ubound=ubound,\n",
" risk_exposure=risk_exposure,\n",
" bm=bm,\n",
" risk_target=(lbound_exposure, ubound_exposure),\n",
" solver='GLPK')\n",
"\n",
" if status != 'optimal':\n",
" return pd.Series(np.ones(len(er)) / len(er))\n",
" else:\n",
" return pd.Series(ret)\n",
" \n",
" look_into = risk_factor_cols.append(pd.Series([index_components_name, 'Date', name]))\n",
" res = total_data[look_into].groupby('Date').apply(get_benchmark_match_pos, name=name).values\n",
" marke_netural_pos[name] = res\n",
" print('{0}: Factor {1} is finished'.format(i, name))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df = pd.DataFrame(marke_netural_pos, index=total_data.Date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n", "ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=pos_df.columns, index=pos_df.index)" "ret_df = pd.DataFrame(ret_mat, columns=normed_factor.columns, index=normed_factor.index)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))" "ret_df[prod_factor_cols].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"len(total_data)" "ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()\n",
"pos_corr.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"turn_over_table = {}\n",
"pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors]\n",
"turn_over_table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_corr.loc[total_factors, total_factors]\n",
"\n",
"writer = ExcelWriter('Summary_500_risk_neutral_tmp.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"turn_over_table.to_excel(writer, 'turn_over')\n",
"writer.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"turn_over_table"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Raw Product Factor \n",
"-----------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_values = total_data[risk_factor_cols].values\n",
"index_components_values = total_data[[index_components_name]].values / 100."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, name in enumerate(total_factors):\n",
" pos_values = pos_df[[name]].values\n",
" risk_values = (pos_values - index_components_values) * risk_factor_values\n",
" risk_tables = pd.DataFrame(risk_values, columns=risk_factor_cols, index=total_data.Date)\n",
" aggregated_risk = risk_tables.groupby(level=0).sum()\n",
" print('{0}: Factor {1}, {2}, {3}'.format(i, name, aggregated_risk.min(),aggregated_risk.max()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"aggregated_risk.max()"
] ]
}, },
{ {
...@@ -405,7 +557,7 @@ ...@@ -405,7 +557,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python [default]", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
...@@ -419,7 +571,7 @@ ...@@ -419,7 +571,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.5.3" "version": "3.6.1"
} }
}, },
"nbformat": 4, "nbformat": 4,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment