Commit 692bd963 authored by Dr.李's avatar Dr.李

added more notebooks

parent 9b102854
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import sqlalchemy\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"server = '10.63.6.176'\n",
"user = 'sa'\n",
"pwd = 'we083826'\n",
"\n",
"engine = sqlalchemy.create_engine('mysql+pymysql://{0}:{1}@{2}/multifactor?charset=utf8'.format(user, pwd, server))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_components = pd.read_sql('select * from index_components', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_components.to_hdf('multifactor.hdf', 'index_components')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_data = pd.read_sql('select * from index_data', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_data.to_hdf('multifactor.hdf', 'index_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_500 = pd.read_sql('select * from prod_500', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"prod_500.to_hdf('multifactor.hdf', 'prod_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500 = pd.read_sql('select * from risk_factor_500', engine)\n",
"risk_factor_500['Market'] = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_500.to_hdf('multifactor.hdf', 'risk_factor_500')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trade_data = pd.read_sql('select * from trade_data', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trade_data.to_hdf('multifactor.hdf', 'trade_data')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_300 = pd.read_sql('select * from prod_300', engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_300.to_hdf('multifactor.hdf', 'prod_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factor_300 = pd.read_sql('select * from risk_factor_300', engine)\n",
"risk_factor_300['Market'] = 1."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_300.to_hdf('multifactor.hdf', 'risk_factor_300')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pylab inline\n",
"import pandas as pd\n",
"import sqlalchemy\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor_data = pd.read_hdf('multifactor.hdf', 'factor_data')\n",
"index_components = pd.read_hdf('multifactor.hdf', 'index_components')\n",
"index_data = pd.read_hdf('multifactor.hdf', 'index_data')\n",
"prod_500 = pd.read_hdf('multifactor.hdf', 'prod_500')\n",
"risk_factor_500 = pd.read_hdf('multifactor.hdf', 'risk_factor_500')\n",
"trade_data = pd.read_hdf('multifactor.hdf', 'trade_data')\n",
"prod_factor_cols = pd.read_hdf('multifactor.hdf', 'prod_factor_cols')\n",
"risk_factor_cols = pd.read_hdf('multifactor.hdf', 'risk_factor_cols')\n",
"common_factor_cols = pd.read_hdf('multifactor.hdf', 'common_factor_cols')"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# data merging\n",
"---------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"del risk_factor_500['Bank']\n",
"del risk_factor_500['NonBankFinancial']\n",
"del factor_data['申万二级行业']\n",
"del factor_data['申万三级行业']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index_components_name = '500Weight'\n",
"benchmark = 'zz500'\n",
"\n",
"del risk_factor_cols[25]\n",
"del risk_factor_cols[26]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"decay = 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trade_data['dret'] = trade_data.Return.groupby(trade_data.Code).shift(-decay)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_data['ret'] = index_data[benchmark] / index_data[benchmark].shift(1) - 1.\n",
"index_data['dret_b'] = index_data['ret'] .shift(-decay)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data = pd.merge(factor_data, prod_500[prod_factor_cols.append(pd.Series(['Date', 'Code']))], on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_data[['Date', 'dret_b']], on='Date', how='left')\n",
"total_data.dropna(inplace=True)\n",
"total_data = pd.merge(total_data, index_components[['Date', 'Code', index_components_name]], on=['Date', 'Code'], how='left')\n",
"total_data.fillna(0, inplace=True)\n",
"total_data = pd.merge(total_data, risk_factor_500, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, trade_data[['Date', 'Code', 'dret']], on=['Date', 'Code'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data = total_data[total_data[index_components_name] != 0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# data processing\n",
"---------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.data.standardize import standardize\n",
"from alphamind.data.neutralize import neutralize\n",
"from alphamind.data.winsorize import winsorize_normal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
"\n",
"all_factors = total_data[total_factors]\n",
"risk_factors = total_data[risk_factor_cols]\n",
"groups = total_data.Date.values.astype(np.int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"factor_processed = neutralize(risk_factors.values,\n",
" standardize(winsorize_normal(all_factors.values, groups=groups),\n",
" groups=groups),\n",
" groups=groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Performance (Long top)\n",
"---------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.portfolio.rankbuilder import rank_build"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"use_rank = 100\n",
"pos_data = rank_build(normed_factor.values, use_rank, groups)\n",
"pos_df = pd.DataFrame(pos_data, columns=normed_factor.columns, index=normed_factor.index) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=normed_factor.columns, index=normed_factor.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"top_factors = ret_df.groupby(level=0).sum()[-90:].sum().abs().sort_values(ascending=False)[:10].index"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-180:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-60:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_corr.loc[prod_factor_cols.tolist(), prod_factor_cols.tolist()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Save Data\n",
"------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pandas import ExcelWriter"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"writer = ExcelWriter('Summary_500.xlsx')\n",
"ret_series = ret_df.groupby(level=0).sum().dropna()\n",
"ret_series.to_excel(writer, 'ret_series')\n",
"pos_corr.to_excel(writer, 'pos_corr')\n",
"writer.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Raw Product Factor \n",
"-----------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_data = rank_build(total_data[prod_factor_cols].values, use_rank, groups)\n",
"pos_df = pd.DataFrame(pos_data, columns=prod_factor_cols, index=total_data.Date) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=pos_df.columns, index=pos_df.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df.groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(total_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment