Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
692bd963
Commit
692bd963
authored
May 12, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added more notebooks
parent
9b102854
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
621 additions
and
0 deletions
+621
-0
data_to_hdf.ipynb
notebooks/data_to_hdf.ipynb
+209
-0
zz500_factor_analysis.ipynb
notebooks/zz500_factor_analysis.ipynb
+412
-0
No files found.
notebooks/data_to_hdf.ipynb
0 → 100644
View file @
692bd963
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sqlalchemy\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"server = '10.63.6.176'\n",
"user = 'sa'\n",
"pwd = 'we083826'\n",
"\n",
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_components = pd.read_sql('select * from index_components', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_components.to_hdf('multifactor.hdf', 'index_components')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_data = pd.read_sql('select * from index_data', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_data.to_hdf('multifactor.hdf', 'index_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_500 = pd.read_sql('select * from prod_500', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prod_500.to_hdf('multifactor.hdf', 'prod_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500 = pd.read_sql('select * from risk_factor_500', engine)\n",
"risk_factor_500['Market'] = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500.to_hdf('multifactor.hdf', 'risk_factor_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trade_data = pd.read_sql('select * from trade_data', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trade_data.to_hdf('multifactor.hdf', 'trade_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_300 = pd.read_sql('select * from prod_300', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_300.to_hdf('multifactor.hdf', 'prod_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300['Market'] = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
notebooks/zz500_factor_analysis.ipynb
0 → 100644
View file @
692bd963
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pylab inline\n",
"import pandas as pd\n",
"import sqlalchemy\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"trade_data = pd.read_hdf('multifactor.hdf', 'trade_data')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# data merging\n",
"---------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']\n",
"del factor_data['申万二级行业']\n",
"del factor_data['申万三级行业']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_components_name = '500Weight'\n",
"benchmark = 'zz500'\n",
"\n",
"del risk_factor_cols[25]\n",
"del risk_factor_cols[26]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"decay = 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trade_data['dret'] = trade_data.Return.groupby(trade_data.Code).shift(-decay)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['dret_b'] = index_data['ret'] .shift(-decay)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data.fillna(0, inplace=True)\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, trade_data[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data = total_data[total_data[index_components_name] != 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# data processing\n",
"---------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.data.standardize import standardize\n",
"from alphamind.data.neutralize import neutralize\n",
"from alphamind.data.winsorize import winsorize_normal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
"\n",
"all_factors = total_data[total_factors]\n",
"risk_factors = total_data[risk_factor_cols]\n",
"groups = total_data.Date.values.astype(np.int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"factor_processed = neutralize(risk_factors.values,\n",
" standardize(winsorize_normal(all_factors.values, groups=groups),\n",
" groups=groups),\n",
" groups=groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Performance (Long top)\n",
"---------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.portfolio.rankbuilder import rank_build"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"use_rank = 100\n",
"pos_data = rank_build(normed_factor.values, use_rank, groups)\n",
"pos_df = pd.DataFrame(pos_data, columns=normed_factor.columns, index=normed_factor.index) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=normed_factor.columns, index=normed_factor.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-180:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-60:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Save Data\n",
"------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pandas import ExcelWriter"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"writer = ExcelWriter('Summary_500.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"writer.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Raw Product Factor \n",
"-----------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_data = rank_build(total_data[prod_factor_cols].values, use_rank, groups)\n",
"pos_df = pd.DataFrame(pos_data, columns=prod_factor_cols, index=total_data.Date) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=pos_df.columns, index=pos_df.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(total_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment