Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
8a369262
Commit
8a369262
authored
May 13, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update notebook
parent
8e2520bc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
232 additions
and
33 deletions
+232
-33
data_to_hdf.ipynb
notebooks/data_to_hdf.ipynb
+191
-7
zz500_factor_analysis.ipynb
notebooks/zz500_factor_analysis.ipynb
+41
-26
No files found.
notebooks/data_to_hdf.ipynb
View file @
8a369262
...
@@ -20,9 +20,9 @@
...
@@ -20,9 +20,9 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"server = '
10.63.6.176
'\n",
"server = '
rm-bp1psdz5615icqc0yo.mysql.rds.aliyuncs.com
'\n",
"user = 'sa'\n",
"user = 'sa'\n",
"pwd = '
we083826
'\n",
"pwd = '
We051253524522
'\n",
"\n",
"\n",
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
]
]
...
@@ -157,13 +157,33 @@
...
@@ -157,13 +157,33 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300['Market'] = 1."
"risk_factor_300['Market'] = 1."
]
]
},
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor_data = pd.read_sql('select * from factor_data', engine)"
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
...
@@ -172,7 +192,171 @@
...
@@ -172,7 +192,171 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
"factor_data.to_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols = pd.Series(['CFinc1', 'BDTO', 'RVOL', 'CHV', 'VAL'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factor_cols.to_hdf('multifactor.hdf', 'prod_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols = pd.Series(risk_factor_500.columns[2:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols.to_hdf('multifactor.hdf', 'risk_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols = pd.Series(factor_data.columns[5:].tolist())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"common_factor_cols.to_hdf('multifactor.hdf', 'common_factor_cols')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_500.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300 = pd.read_sql('select * from return_data_300', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_300.to_hdf('multifactor.hdf', 'return_data_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500 = pd.read_sql('select * from return_data_500', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_data_500.to_hdf('multifactor.hdf', 'return_data_500')"
]
]
},
},
{
{
...
@@ -187,7 +371,7 @@
...
@@ -187,7 +371,7 @@
],
],
"metadata": {
"metadata": {
"kernelspec": {
"kernelspec": {
"display_name": "Python
3
",
"display_name": "Python
[default]
",
"language": "python",
"language": "python",
"name": "python3"
"name": "python3"
},
},
...
@@ -201,9 +385,9 @@
...
@@ -201,9 +385,9 @@
"name": "python",
"name": "python",
"nbconvert_exporter": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"pygments_lexer": "ipython3",
"version": "3.
6.1
"
"version": "3.
5.3
"
}
}
},
},
"nbformat": 4,
"nbformat": 4,
"nbformat_minor":
2
"nbformat_minor":
1
}
}
notebooks/zz500_factor_analysis.ipynb
View file @
8a369262
...
@@ -18,12 +18,13 @@
...
@@ -18,12 +18,13 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"%%time\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"
trade_data = pd.read_hdf('multifactor.hdf', 'trade_data
')\n",
"
return_data_500 = pd.read_hdf('multifactor.hdf', 'return_data_500
')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
...
@@ -42,7 +43,9 @@
...
@@ -42,7 +43,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['Bank']\n",
...
@@ -54,7 +57,9 @@
...
@@ -54,7 +57,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"index_components_name = '500Weight'\n",
"index_components_name = '500Weight'\n",
...
@@ -72,16 +77,7 @@
...
@@ -72,16 +77,7 @@
},
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"decay = 2"
"return_data_500['dret'] = return_data_500['D1LogReturn']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trade_data['dret'] = trade_data.Return.groupby(trade_data.Code).shift(-decay)"
]
]
},
},
{
{
...
@@ -93,7 +89,7 @@
...
@@ -93,7 +89,7 @@
"outputs": [],
"outputs": [],
"source": [
"source": [
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['dret_b'] = index_data['ret'] .shift(-
decay
)"
"index_data['dret_b'] = index_data['ret'] .shift(-
2
)"
]
]
},
},
{
{
...
@@ -102,19 +98,22 @@
...
@@ -102,19 +98,22 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"%%time\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n",
"total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data.fillna(0, inplace=True)\n",
"total_data.fillna(0, inplace=True)\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data,
trade_data
[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
"total_data = pd.merge(total_data,
return_data_500
[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"total_data = total_data[total_data[index_components_name] != 0]"
"total_data = total_data[total_data[index_components_name] != 0]"
...
@@ -144,7 +143,9 @@
...
@@ -144,7 +143,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
...
@@ -170,7 +171,9 @@
...
@@ -170,7 +171,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
...
@@ -210,7 +213,9 @@
...
@@ -210,7 +213,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
...
@@ -220,7 +225,9 @@
...
@@ -220,7 +225,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
...
@@ -283,7 +290,9 @@
...
@@ -283,7 +290,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"pos_corr = pos_df.corr()"
"pos_corr = pos_df.corr()"
...
@@ -353,7 +362,9 @@
...
@@ -353,7 +362,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
...
@@ -363,7 +374,9 @@
...
@@ -363,7 +374,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
...
@@ -372,7 +385,9 @@
...
@@ -372,7 +385,9 @@
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"outputs": [],
"source": [
"source": [
"len(total_data)"
"len(total_data)"
...
@@ -390,7 +405,7 @@
...
@@ -390,7 +405,7 @@
],
],
"metadata": {
"metadata": {
"kernelspec": {
"kernelspec": {
"display_name": "Python
3
",
"display_name": "Python
[default]
",
"language": "python",
"language": "python",
"name": "python3"
"name": "python3"
},
},
...
@@ -404,7 +419,7 @@
...
@@ -404,7 +419,7 @@
"name": "python",
"name": "python",
"nbconvert_exporter": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"pygments_lexer": "ipython3",
"version": "3.
6.1
"
"version": "3.
5.3
"
}
}
},
},
"nbformat": 4,
"nbformat": 4,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment