Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2b165e01
Commit
2b165e01
authored
Feb 10, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added factor strategy with dask cluster
parent
2dc2692a
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
447 additions
and
0 deletions
+447
-0
full factors strategy with dask cluster.ipynb
notebooks/full factors strategy with dask cluster.ipynb
+447
-0
No files found.
notebooks/full factors strategy with dask cluster.ipynb
0 → 100644
View file @
2b165e01
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import pandas as pd\n",
"from matplotlib import pyplot as plt\n",
"import functools\n",
"from alphamind.api import *\n",
"from PyFin.api import *\n",
"\n",
"plt.style.use('ggplot')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Meta Data Parameters\n",
"----------------------"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"freq = '10b'\n",
"universe = Universe('custom', ['zz800'])\n",
"batch = 16\n",
"neutralized_risk = ['SIZE'] + industry_styles\n",
"risk_model = 'short'\n",
"pre_process = [winsorize_normal, standardize]\n",
"post_process = [winsorize_normal, standardize]\n",
"warm_start = 0\n",
"data_source = 'postgres+psycopg2://postgres:we083826@192.168.0.102/alpha'\n",
"dask_cluster = '192.168.0.102:8786'\n",
"\n",
"horizon = map_freq(freq)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper function to train / Precit a model\n",
"-------------------"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def train_daily_model(params):\n",
" ref_date, meta_model = params\n",
" data_meta = DataMeta(freq=freq,\n",
" universe=universe,\n",
" batch=batch,\n",
" neutralized_risk=neutralized_risk,\n",
" risk_model=risk_model,\n",
" pre_process=pre_process,\n",
" post_process=post_process,\n",
" warm_start=warm_start,\n",
" data_source=data_source)\n",
"\n",
" return train_model(ref_date=ref_date, alpha_model=meta_model, data_meta=data_meta)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def predict_daily_model(params):\n",
" ref_date, alpha_model = params\n",
" data_meta = DataMeta(freq=freq,\n",
" universe=universe,\n",
" batch=batch,\n",
" neutralized_risk=neutralized_risk,\n",
" risk_model=risk_model,\n",
" pre_process=pre_process,\n",
" post_process=post_process,\n",
" warm_start=warm_start,\n",
" data_source=data_source)\n",
" return predict_by_model(ref_date=ref_date, alpha_model=alpha_model, data_meta=data_meta)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Strategy Settings\n",
"---------------------"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"start_date = '2010-02-01'\n",
"end_date = '2018-01-29'\n",
"industry_category = 'sw_adj'\n",
"industry_level = 1\n",
"industries = industry_list(industry_category, industry_level)\n",
"styles = ['SIZE']\n",
"benchmark = 300\n",
"turn_over_target_base = 0.30\n",
"executor = NaiveExecutor()\n",
"\n",
"ref_dates = makeSchedule(firstDate=start_date,\n",
" endDate=end_date,\n",
" tenor=freq,\n",
" calendar='china.sse',\n",
" dateGenerationRule=DateGeneration.Backward)\n",
"ref_dates = [ref_date.strftime('%Y-%m-%d') for ref_date in ref_dates]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Features\n",
"\n",
"base1 = LAST('roe_q')\n",
"base2 = CSRes('ep_q', base1)\n",
"\n",
"features = {'f01': base1,\n",
" 'f02': base2,\n",
" 'f03': CSRes(CSRes('market_confidence_15d', base1), base2),\n",
" 'f04': CSRes(CSRes('RecievableTO', base1), base2),\n",
" 'f05': CSRes(CSRes('val_q', base1), base2),\n",
" 'f06': CSRes(CSRes('BP', base1), base2),\n",
" 'f07': CSRes(CSRes('con_pe_rolling_order', base1), base2),\n",
" 'f08': CSRes(CSRes('con_pb_rolling_order', base1), base2),\n",
" 'f09': CSRes(CSRes('DebtEquityRatio', base1), base2)}\n",
"\n",
"weights = {'f01': 1.0,\n",
" 'f02': 1.0,\n",
" 'f03': 0.25,\n",
" 'f04': 0.25,\n",
" 'f05': 0.25,\n",
" 'f06': 0.25,\n",
" 'f07': -0.25,\n",
" 'f08': -0.25,\n",
" 'f09': -0.25}\n",
"\n",
"const_model = ConstLinearModel(features=features, weights=weights)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Model\n",
"\n",
"meta_model = XGBTrainer(features=features,\n",
" objective='reg:linear',\n",
" booster='gbtree',\n",
" tree_method='hist',\n",
" n_estimators=2000,\n",
" learning_rate=0.01,\n",
" early_stopping_rounds=30,\n",
" subsample=0.25,\n",
" colsample_bytree=1.,\n",
" n_jobs=1,\n",
" eval_sample=0.3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train / Predict Models with Dask Cluster\n",
"--------------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Training Phase"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from dask.distributed import Client\n",
"client = Client(dask_cluster)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"tasks = client.map(train_daily_model, [(ref_date, meta_model) for ref_date in ref_dates])\n",
"models = client.gather(tasks) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Predicting Phase"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"tasks = client.map(predict_daily_model, list(zip(ref_dates, models)))\n",
"predictions1 = client.gather(tasks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"tasks = client.map(predict_daily_model, [(ref_date, const_model) for ref_date in ref_dates])\n",
"predictions2 = client.gather(tasks)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Portfolio Rebalance\n",
"-----------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# constraints setting\n",
"total_risks = industries + styles + ['benchmark']\n",
"\n",
"b_type = []\n",
"l_val = []\n",
"u_val = []\n",
"\n",
"for j, name in enumerate(total_risks):\n",
" if name == 'benchmar':\n",
" b_type.append(BoundaryType.RELATIVE)\n",
" l_val.append(0.8)\n",
" u_val.append(1.0)\n",
" elif name == 'SIZE':\n",
" b_type.append(BoundaryType.ABSOLUTE)\n",
" l_val.append(0.)\n",
" u_val.append(0.)\n",
" else:\n",
" b_type.append(BoundaryType.RELATIVE)\n",
" l_val.append(1.0)\n",
" u_val.append(1.0)\n",
" \n",
"\n",
"bounds = create_box_bounds(total_risks, b_type, l_val, u_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"\n",
"engine = SqlEngine(data_source)\n",
"\n",
"rets = []\n",
"turn_overs = []\n",
"previous_pos = pd.DataFrame()\n",
"\n",
"trade_dates = ref_dates\n",
"\n",
"for i, ref_date in enumerate(trade_dates):\n",
" er = 0.0 * predictions1[i].values.flatten().astype(float) + 1.0 * predictions2[i].values.flatten().astype(float)\n",
" codes = predictions[i].index.values.astype(int).tolist()\n",
" industry_data = engine.fetch_industry_matrix(ref_date, codes, category=industry_category, level=industry_level)\n",
" industry_exp = industry_data[industries].values\n",
" industry_names = industry_data.industry_name.values\n",
" style_exp = engine.fetch_risk_model(ref_date, codes, risk_model=risk_model)[1][styles].values\n",
" benchmark_w = engine.fetch_benchmark(ref_date, benchmark, codes).weight.values\n",
" is_in_benchmark = (benchmark_w > 0.).astype(float)\n",
" \n",
" risk_exp = np.concatenate([industry_exp, style_exp, is_in_benchmark.reshape((-1, 1))], axis=1)\n",
" cons_mat = pd.DataFrame(risk_exp, columns=total_risks)\n",
" constraint = LinearConstraints(bounds=bounds,\n",
" cons_mat=cons_mat,\n",
" backbone=benchmark_w)\n",
" \n",
" lbound = np.maximum(0., benchmark_w - 0.02)\n",
" ubound = 0.02 + benchmark_w\n",
" \n",
" if previous_pos.empty:\n",
" current_position = None\n",
" turn_over_target = None\n",
" else:\n",
" previous_pos.set_index('code', inplace=True)\n",
" remained_pos = previous_pos.loc[codes]\n",
"\n",
" remained_pos.fillna(0., inplace=True)\n",
" turn_over_target = turn_over_target_base\n",
" current_position = remained_pos.weight.values\n",
" \n",
" try:\n",
" target_pos, _ = er_portfolio_analysis(er,\n",
" industry_names,\n",
" None,\n",
" constraint,\n",
" False,\n",
" benchmark_w,\n",
" method='risk_neutral',\n",
" turn_over_target=turn_over_target,\n",
" current_position=current_position,\n",
" lbound=lbound,\n",
" ubound=ubound)\n",
" except ValueError:\n",
" alpha_logger.info('{0} full re-balance'.format(date))\n",
" target_pos, _ = er_portfolio_analysis(er,\n",
" industry_names,\n",
" None,\n",
" constraint,\n",
" False,\n",
" benchmark_w,\n",
" method=method,\n",
" lbound=lbound,\n",
" ubound=ubound)\n",
" \n",
" target_pos['code'] = codes\n",
" \n",
" turn_over, executed_pos = executor.execute(target_pos=target_pos)\n",
" executed_codes = executed_pos.code.tolist()\n",
" \n",
" dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)\n",
" result = pd.merge(executed_pos, dx_returns, on=['code'])\n",
" ret = result.weight.values @ (np.exp(result.dx.values) - 1.)\n",
" rets.append(np.log(1. + ret))\n",
" \n",
" executor.set_current(executed_pos)\n",
" turn_overs.append(turn_over)\n",
" previous_pos = executed_pos\n",
"\n",
" alpha_logger.info('{0} is finished'.format(ref_date))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=trade_dates)\n",
"\n",
"# index return\n",
"index_return = engine.fetch_dx_return_index_range(benchmark,\n",
" dates=trade_dates,\n",
" horizon=horizon,\n",
" offset=1).set_index('trade_date')\n",
"ret_df['index'] = index_return['dx']\n",
"\n",
"ret_df.loc[advanceDateByCalendar('china.sse', trade_dates[-1], freq)] = 0.\n",
"ret_df = ret_df.shift(1)\n",
"ret_df.iloc[0] = 0.\n",
"ret_df['tc_cost'] = ret_df.turn_over * 0.002\n",
"ret_df['returns'] = ret_df['returns'] - ret_df['index']\n",
"\n",
"ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),\n",
" title='Fixed frequency rebalanced: {0}'.format(freq),\n",
" secondary_y='tc_cost')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment