Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
e9a1e696
Commit
e9a1e696
authored
May 14, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added factor analysis
parent
8a369262
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
209 additions
and
57 deletions
+209
-57
Summary_500_long_top.xlsx
notebooks/Summary_500_long_top.xlsx
+0
-0
Summary_500_risk_neutral.xlsx
notebooks/Summary_500_risk_neutral.xlsx
+0
-0
zz500_factor_analysis.ipynb
notebooks/zz500_factor_analysis.ipynb
+209
-57
No files found.
notebooks/Summary_500_long_top.xlsx
0 → 100644
View file @
e9a1e696
File added
notebooks/Summary_500_risk_neutral.xlsx
0 → 100644
View file @
e9a1e696
File added
notebooks/zz500_factor_analysis.ipynb
View file @
e9a1e696
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
"source": [
"source": [
"%%time\n",
"%%time\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"common_500 = pd.read_hdf('multifactor.hdf', 'common_500')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
...
@@ -27,7 +28,8 @@
...
@@ -27,7 +28,8 @@
"return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n",
"return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')\n",
"common_500_factor_cols = pd.read_hdf('multifactor.hdf', 'common_500_factor_cols')"
]
]
},
},
{
{
...
@@ -40,20 +42,6 @@
...
@@ -40,20 +42,6 @@
"---------------------"
"---------------------"
]
]
},
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']\n",
"del factor_data['申万二级行业']\n",
"del factor_data['申万三级行业']"
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
...
@@ -63,10 +51,7 @@
...
@@ -63,10 +51,7 @@
"outputs": [],
"outputs": [],
"source": [
"source": [
"index_components_name = '500Weight'\n",
"index_components_name = '500Weight'\n",
"benchmark = 'zz500'\n",
"benchmark = 'zz500'"
"\n",
"del risk_factor_cols[25]\n",
"del risk_factor_cols[26]"
]
]
},
},
{
{
...
@@ -100,6 +85,7 @@
...
@@ -100,6 +85,7 @@
"source": [
"source": [
"%%time\n",
"%%time\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, common_500[common_500_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n",
"total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
...
@@ -119,6 +105,15 @@
...
@@ -119,6 +105,15 @@
"total_data = total_data[total_data[index_components_name] != 0]"
"total_data = total_data[total_data[index_components_name] != 0]"
]
]
},
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(total_data)"
]
},
{
{
"cell_type": "markdown",
"cell_type": "markdown",
"metadata": {},
"metadata": {},
...
@@ -130,9 +125,7 @@
...
@@ -130,9 +125,7 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"from alphamind.data.standardize import standardize\n",
"from alphamind.data.standardize import standardize\n",
...
@@ -143,12 +136,13 @@
...
@@ -143,12 +136,13 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
"total_factors = total_factors.append(common_500_factor_cols)\n",
"\n",
"#risk_factor_cols = risk_factor_cols[risk_factor_cols != 'Size']\n",
"\n",
"\n",
"all_factors = total_data[total_factors]\n",
"all_factors = total_data[total_factors]\n",
"risk_factors = total_data[risk_factor_cols]\n",
"risk_factors = total_data[risk_factor_cols]\n",
...
@@ -163,17 +157,14 @@
...
@@ -163,17 +157,14 @@
"source": [
"source": [
"%%time\n",
"%%time\n",
"factor_processed = neutralize(risk_factors.values,\n",
"factor_processed = neutralize(risk_factors.values,\n",
" standardize(winsorize_normal(all_factors.values, groups=groups),\n",
" winsorize_normal(all_factors.values, groups=groups),\n",
" groups=groups),\n",
" groups=groups)"
" groups=groups)"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
...
@@ -304,15 +295,28 @@
...
@@ -304,15 +295,28 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"pos_corr
.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()
]"
"pos_corr
= pos_corr.loc[total_factors, total_factors
]"
]
]
},
},
{
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {},
"outputs": [],
"source": [
"source": [
"# Save Data\n",
"turn_over_table = {}\n",
"------------------------"
"pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors]\n",
"turn_over_table"
]
]
},
},
{
{
...
@@ -329,13 +333,16 @@
...
@@ -329,13 +333,16 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"writer = ExcelWriter('Summary_500.xlsx')\n",
"writer = ExcelWriter('Summary_500
_long_top_tmp
.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"turn_over_table.to_excel(writer, 'turn_over')\n",
"writer.close()"
"writer.close()"
]
]
},
},
...
@@ -343,54 +350,199 @@
...
@@ -343,54 +350,199 @@
"cell_type": "markdown",
"cell_type": "markdown",
"metadata": {},
"metadata": {},
"source": [
"source": [
"#
Raw Product Factor
\n",
"#
Factor Performance (risk neutral)
\n",
"---------------------------------
--------
"
"---------------------------------"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"pos_data = rank_build(total_data[prod_factor_cols].values, use_rank, groups)\n",
"from alphamind.portfolio.linearbuilder import linear_build"
"pos_df = pd.DataFrame(pos_data, columns=prod_factor_cols, index=total_data.Date) / use_rank"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
"outputs": [],
},
"source": [
"total_data[total_factors] = normed_factor.values\n",
"total_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"marke_netural_pos = {}\n",
"for i, name in enumerate(total_factors):\n",
"\n",
" lbound_exposure = -1e-2 * np.ones(len(risk_factor_cols))\n",
" ubound_exposure = 1e-2 * np.ones(len(risk_factor_cols))\n",
"\n",
" def get_benchmark_match_pos(x, name):\n",
" er = x[name].values\n",
" bm = x[index_components_name].values / 100.\n",
" lbound = 0.\n",
" ubound = 0.01 + bm\n",
" risk_exposure = x[risk_factor_cols].values\n",
"\n",
" status, value , ret = linear_build(er,\n",
" lbound=lbound,\n",
" ubound=ubound,\n",
" risk_exposure=risk_exposure,\n",
" bm=bm,\n",
" risk_target=(lbound_exposure, ubound_exposure),\n",
" solver='GLPK')\n",
"\n",
" if status != 'optimal':\n",
" return pd.Series(np.ones(len(er)) / len(er))\n",
" else:\n",
" return pd.Series(ret)\n",
" \n",
" look_into = risk_factor_cols.append(pd.Series([index_components_name, 'Date', name]))\n",
" res = total_data[look_into].groupby('Date').apply(get_benchmark_match_pos, name=name).values\n",
" marke_netural_pos[name] = res\n",
" print('{0}: Factor {1} is finished'.format(i, name))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df = pd.DataFrame(marke_netural_pos, index=total_data.Date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=
pos_df.columns, index=pos_df
.index)"
"ret_df = pd.DataFrame(ret_mat, columns=
normed_factor.columns, index=normed_factor
.index)"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"ret_df
.groupby(level=0).sum()[-90:]
.cumsum().plot(figsize=(16, 8))"
"ret_df
[prod_factor_cols].groupby(level=0).sum()
.cumsum().plot(figsize=(16, 8))"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {
"metadata": {},
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"len(total_data)"
"ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()\n",
"pos_corr.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"turn_over_table = {}\n",
"pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors]\n",
"turn_over_table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_corr.loc[total_factors, total_factors]\n",
"\n",
"writer = ExcelWriter('Summary_500_risk_neutral_tmp.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"turn_over_table.to_excel(writer, 'turn_over')\n",
"writer.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"turn_over_table"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Raw Product Factor \n",
"-----------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_values = total_data[risk_factor_cols].values\n",
"index_components_values = total_data[[index_components_name]].values / 100."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i, name in enumerate(total_factors):\n",
" pos_values = pos_df[[name]].values\n",
" risk_values = (pos_values - index_components_values) * risk_factor_values\n",
" risk_tables = pd.DataFrame(risk_values, columns=risk_factor_cols, index=total_data.Date)\n",
" aggregated_risk = risk_tables.groupby(level=0).sum()\n",
" print('{0}: Factor {1}, {2}, {3}'.format(i, name, aggregated_risk.min(),aggregated_risk.max()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"aggregated_risk.max()"
]
]
},
},
{
{
...
@@ -405,7 +557,7 @@
...
@@ -405,7 +557,7 @@
],
],
"metadata": {
"metadata": {
"kernelspec": {
"kernelspec": {
"display_name": "Python
[default]
",
"display_name": "Python
3
",
"language": "python",
"language": "python",
"name": "python3"
"name": "python3"
},
},
...
@@ -419,7 +571,7 @@
...
@@ -419,7 +571,7 @@
"name": "python",
"name": "python",
"nbconvert_exporter": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"pygments_lexer": "ipython3",
"version": "3.
5.3
"
"version": "3.
6.1
"
}
}
},
},
"nbformat": 4,
"nbformat": 4,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment