Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
f391d805
Commit
f391d805
authored
May 15, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added new file for back testing
parent
b4f25246
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
700 additions
and
0 deletions
+700
-0
new_portfolio.ipynb
notebooks/new_portfolio.ipynb
+700
-0
No files found.
notebooks/new_portfolio.ipynb
0 → 100644
View file @
f391d805
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pylab inline\n",
"import pandas as pd\n",
"import sqlalchemy\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"engine = sqlalchemy.create_engine('mysql+mysqldb://sa:we083826@10.63.6.176/multifactor?charset=utf8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"common_factors = ['EPSAfterNonRecurring',\n",
" 'DivP']\n",
"\n",
"prod_factor = ['CFinc1', 'BDTO', 'RVOL', 'CHV']\n",
"common_500 = []\n",
"\n",
"factor_weights = 1. / np.array([15.44, 32.72, 49.90, 115.27, 97.76, 280.89])\n",
"factor_weights = factor_weights / factor_weights.sum()\n",
"\n",
"total_factors = common_factors + common_500 + prod_factor\n",
"risk_factor_500 = ['CommunicationsAndTransportation',\n",
"'LeisureServices',\n",
"'MultiMedia',\n",
"'PublicUtility',\n",
"'Agriculture',\n",
"'ChemicalIndustry',\n",
"'MedicationAndBio',\n",
"'CommercialTrade',\n",
"'DefenseIndustry',\n",
"'HouseholdAppliances',\n",
"'ConstructionAndMaterial',\n",
"'BuildingDecoration',\n",
"'RealEstate',\n",
"'DiversifiedMetal',\n",
"'Machinary',\n",
"'MotorVehicle',\n",
"'ElectronicIndustry',\n",
"'ElectricalEquip',\n",
"'TextileAndGarment',\n",
"'Synthetics',\n",
"'Computer',\n",
"'LightManufacturing',\n",
"'Telecoms',\n",
"'ExtractiveIndustry',\n",
"'Metal',\n",
"'FoodAndBeverage',\n",
"'Size']\n",
"\n",
"index_components = '500Weight'\n",
"return_data = 'D1LogReturn'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"commo_factors_df = pd.read_sql('select Date, Code, 申万一级行业, {0} from factor_data'.format(','.join(common_factors)), engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"prod_factors_df = pd.read_sql('select Date, Code, {0} from prod_500'.format(','.join(prod_factor)), engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"common_500_df = pd.read_sql('select Date, Code, {0} from common_500'.format(','.join(common_500)), engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_df = pd.read_sql('select Date, Code, {0} from risk_factor_500'.format(','.join(risk_factor_500)), engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"index_components_df = pd.read_sql('select Date, Code, {0} from index_components'.format(index_components), engine)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"return_df = pd.read_sql('select Date, Code, {0} from return_500'.format(return_data), engine)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Merge Data\n",
"-------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"#total_data = pd.merge(commo_factors_df, common_500_df, on=['Date', 'Code'])\n",
"total_data = pd.merge(commo_factors_df, prod_factors_df, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, risk_factor_df, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, index_components_df, on=['Date', 'Code'])\n",
"total_data = pd.merge(total_data, return_df, on=['Date', 'Code'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data = total_data[total_data[index_components] != 0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(total_data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Process Data\n",
"--------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.data.standardize import standardize\n",
"from alphamind.data.neutralize import neutralize\n",
"from alphamind.data.winsorize import winsorize_normal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_factors_names = risk_factor_500 + ['Market']\n",
"total_data['Market'] = 1.\n",
"\n",
"all_factors = total_data[total_factors]\n",
"risk_factors = total_data[risk_factors_names]\n",
"groups = total_data.Date.values.astype(np.int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"factor_processed = neutralize(risk_factors.values,\n",
" standardize(winsorize_normal(all_factors.values, groups=groups),\n",
" groups=groups),\n",
" groups=groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"normed_factor.tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = normed_factor.iloc[:, :-1] @ factor_weights[:-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data['res'] = res\n",
"total_data[total_factors] = normed_factor.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Performance (Long Top)\n",
"---------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.portfolio.rankbuilder import rank_build"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"factor_names = total_factors + ['res']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"use_rank = 100\n",
"pos_data = rank_build(total_data[factor_names].values, use_rank, groups)\n",
"pos_df = pd.DataFrame(pos_data, columns=factor_names, index=normed_factor.index) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components]].values / 100.) * total_data[['D1LogReturn']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=factor_names, index=normed_factor.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df.groupby(level=0).sum().tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df['res'].groupby(level=0).sum()[-500:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_df['Code'] = total_data.Code.values\n",
"pos_df['Ind'] = total_data['申万一级行业'].values\n",
"pos_df['bm'] = total_data[index_components].values / 100."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df.loc['2017-01-01':, :].to_csv('aggregated_pos.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"turn_over_table = {}\n",
"pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors + ['res']:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_tc_long_top_df = ret_df.groupby(level=0).sum() - turn_over_table * 0.0015"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_long_top_df[total_factors + ['res']][-30:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_long_top_df['res'][-30:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Performance (risk neutral)\n",
"---------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import alphamind.portfolio.linearbuilder as lb\n",
"import importlib\n",
"importlib.reload(lb)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"marke_netural_pos = {}\n",
"\n",
"for i, name in enumerate(total_factors + ['res']):\n",
"\n",
" lbound_exposure = -0.01\n",
" ubound_exposure = 0.01\n",
"\n",
" def get_benchmark_match_pos(x, name):\n",
" er = x[name].values\n",
" bm = x[index_components].values / 100.\n",
" lbound = 0.\n",
" ubound = 0.01 + bm\n",
" risk_exposure = x[risk_factors_names].values\n",
"\n",
" status, value , ret = lb.linear_build(er,\n",
" lbound=lbound,\n",
" ubound=ubound,\n",
" risk_exposure=risk_exposure,\n",
" bm=bm,\n",
" risk_target=(lbound_exposure, ubound_exposure),\n",
" solver=None)\n",
" print(status)\n",
"\n",
" if status != 'optimal':\n",
" return pd.Series(np.ones(len(er)) / len(er))\n",
" else:\n",
" return pd.Series(ret)\n",
" \n",
" look_into = risk_factors_names + [index_components, 'Date', name]\n",
" res = total_data[look_into].groupby('Date').apply(get_benchmark_match_pos, name=name).values\n",
" marke_netural_pos[name] = res\n",
" print('{0}: Factor {1} is finished'.format(i, name))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_df = pd.DataFrame(marke_netural_pos, index=total_data.Date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components]].values / 100.) * total_data[['D1LogReturn']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=pos_df.columns, index=normed_factor.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[total_factors + ['res']].groupby(level=0).sum()[-500:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df[total_factors + ['res']].groupby(level=0).sum().tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df['Code'] = total_data.Code.values\n",
"pos_df['Ind'] = total_data['申万一级行业'].values\n",
"pos_df['bm'] = total_data[index_components].values / 100.\n",
"pos_df.loc['2017-01-01':, :].to_csv('aggregated_pos_risk_neutral.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df['res'].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df['res'].groupby(level=0).sum()[-500:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df['2017-01-01':].corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pos_df.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"turn_over_table = {}\n",
"pos_df['Code'] = total_data.Code.values\n",
"pos_df.reset_index(inplace=True)\n",
"\n",
"for name in total_factors + ['res']:\n",
" pos_series = pos_df[['Date', 'Code', name]]\n",
" pivot_position = pos_series.pivot_table(name, index='Date', columns='Code').fillna(0.)\n",
" turn_over_series = pivot_position.diff().abs().sum(axis=1)\n",
" turn_over_table[name] = turn_over_series.values\n",
" \n",
"turn_over_table = pd.DataFrame(turn_over_table, index=pos_df.Date.unique())\n",
"turn_over_table = turn_over_table[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df = ret_df.groupby(level=0).sum() - turn_over_table * 0.0015"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df[total_factors + ['res']].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df['res'][-500:].cumsum().plot(figsize=(16, 8))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparisong\n",
"--------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df[-500:].std()[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_long_top_df[-500:].std()[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df[-500:].mean()[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_long_top_df[-500:].mean()[total_factors + ['res']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_tc_risk_neutral_df.tail(50)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df.groupby(level=0).sum().tail(50)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment