added example

1f57b3df · Dr.李 · 04f81b19 · 1f57b3df
Commit 1f57b3df authored Mar 17, 2018 by Dr.李
Show whitespace changes
Inline Side-by-side

Showing with 384 additions and 0 deletions

Example 6 - Target Volatility Builder.ipynb notebooks/Example 6 - Target Volatility Builder.ipynb +384 -0

No files found.
--- a/notebooks/Example 6 - Target Volatility Builder.ipynb
+++ b/notebooks/Example 6 - Target Volatility Builder.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from matplotlib import pyplot as plt\n",
+    "from PyFin.api import *\n",
+    "from alphamind.api import *\n",
+    "from alphamind.portfolio.meanvariancebuilder import target_vol_builder\n",
+    "\n",
+    "plt.style.use('ggplot')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Single Day Analysis\n",
+    "-----------------------"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ref_date = '2018-02-08'\n",
+    "engine = SqlEngine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha')\n",
+    "universe = Universe('custom', ['zz800'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "codes = engine.fetch_codes(ref_date, universe)\n",
+    "total_data = engine.fetch_data(ref_date, 'ep_q', codes, 905, industry='sw_adj')\n",
+    "all_styles = risk_styles + industry_styles + ['COUNTRY']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "risk_cov = total_data['risk_cov'][all_styles].values\n",
+    "factor = total_data['factor']\n",
+    "risk_exposure = factor[all_styles].values\n",
+    "special_risk = factor['s_srisk'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000\n",
+    "sec_cov_df = pd.DataFrame(sec_cov, index=codes, columns=codes)\n",
+    "sec_cov_df.iloc[:5, :5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Portfolio Construction\n",
+    "\n",
+    "* using `ep_q` factor as alpha factor;\n",
+    "* short selling is forbiden;\n",
+    "* target of volatility for the activate weight is setting at 5% annually level."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "er = factor['ep_q'].values\n",
+    "bm = factor['weight'].values\n",
+    "lbound = np.zeros(len(er))\n",
+    "ubound = bm + 0.05\n",
+    "cons_mat = np.ones((len(er), 1))\n",
+    "risk_targets = (bm.sum(), bm.sum())\n",
+    "target_vol = 0.045\n",
+    "\n",
+    "status, p_er, p_weight = \\\n",
+    "    target_vol_builder(er, sec_cov, bm, lbound, ubound, cons_mat, risk_targets, target_vol, target_vol)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check the result\n",
+    "print(f\"total weight is {p_weight.sum(): .4f}\")\n",
+    "print(f\"portfolio activate weight forecasting vol is {np.sqrt((p_weight - bm) @ sec_cov @ (p_weight - bm)):.2f}\")\n",
+    "print(f\"portfolio expected return is {p_weight @ er:.4f} comparing with benchmark er {bm @ er:.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Porfolio Construction: 2010 ~ 2018\n",
+    "-------------------------------"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Back test parameter settings\n",
+    "\"\"\"\n",
+    "\n",
+    "start_date = '2011-01-01'\n",
+    "end_date = '2018-02-14'\n",
+    "\n",
+    "freq = '10b'\n",
+    "industry_lower = 1.0\n",
+    "industry_upper = 1.0\n",
+    "neutralized_risk = ['SIZE'] + industry_styles\n",
+    "industry_name = 'sw_adj'\n",
+    "industry_level = 1\n",
+    "batch = 0\n",
+    "horizon = map_freq(freq)\n",
+    "universe = Universe(\"custom\", ['zz500'])\n",
+    "data_source = 'postgres+psycopg2://postgres:we083826@192.168.0.102/alpha'\n",
+    "benchmark_code = 905\n",
+    "target_vol = 0.05\n",
+    "\n",
+    "executor = NaiveExecutor()\n",
+    "ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')\n",
+    "engine = SqlEngine(data_source)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Factor Model\n",
+    "\"\"\"\n",
+    "\n",
+    "alpha_factors = {'f01': LAST('ep_q')}\n",
+    "\n",
+    "weights = dict(f01=1.)\n",
+    "\n",
+    "alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)\n",
+    "\n",
+    "def predict_worker(params):\n",
+    "    data_meta = DataMeta(freq=freq,\n",
+    "                         universe=universe,\n",
+    "                         batch=batch,\n",
+    "                         neutralized_risk=neutralized_risk,\n",
+    "                         risk_model='short',\n",
+    "                         pre_process=[winsorize_normal, standardize],\n",
+    "                         post_process=[winsorize_normal, standardize],\n",
+    "                         warm_start=0,\n",
+    "                         data_source=data_source)\n",
+    "    ref_date, model = params\n",
+    "    er = predict_by_model(ref_date, model, data_meta)\n",
+    "    return er"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "\"\"\"\n",
+    "Predicting Phase\n",
+    "\"\"\"\n",
+    "\n",
+    "from dask.distributed import Client\n",
+    "client = Client('192.168.0.102:8786')\n",
+    "\n",
+    "tasks = client.map(predict_worker, [(d.strftime('%Y-%m-%d'), alpha_model) for d in ref_dates], pure=False)\n",
+    "predicts = client.gather(tasks)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Shared Data\n",
+    "\"\"\"\n",
+    "\n",
+    "constraint_risk = ['SIZE', 'SIZENL', 'BETA']\n",
+    "total_risk_names = constraint_risk + ['total']\n",
+    "\n",
+    "b_type = []\n",
+    "l_val = []\n",
+    "u_val = []\n",
+    "\n",
+    "for name in total_risk_names:\n",
+    "    b_type.append(BoundaryType.ABSOLUTE)\n",
+    "    l_val.append(0.0)\n",
+    "    u_val.append(0.0)\n",
+    "    \n",
+    "bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)\n",
+    "industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)\n",
+    "benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)\n",
+    "risk_cov_total, risk_exposure_total = engine.fetch_risk_model_range(universe, dates=ref_dates)\n",
+    "index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, offset=1).set_index('trade_date')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# rebalance\n",
+    "\n",
+    "def create_scenario(target_vol):\n",
+    "    \n",
+    "    all_styles = risk_styles + industry_styles + macro_styles\n",
+    "    previous_pos = pd.DataFrame()\n",
+    "    rets = []\n",
+    "    turn_overs = []\n",
+    "    leverags = []\n",
+    "    ics = []\n",
+    "\n",
+    "    for i, ref_date in enumerate(ref_dates):\n",
+    "        ref_date = ref_date.strftime('%Y-%m-%d')\n",
+    "        industry_matrix = industry_total[industry_total.trade_date == ref_date]\n",
+    "        benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]\n",
+    "        risk_exposure = risk_exposure_total[risk_exposure_total.trade_date == ref_date]\n",
+    "        risk_cov = risk_cov_total[risk_cov_total.trade_date == ref_date]\n",
+    "        \n",
+    "        total_data = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)\n",
+    "        total_data = pd.merge(total_data, risk_exposure, on=['code'])\n",
+    "        total_data = total_data.dropna()\n",
+    "        codes = total_data.code.values.tolist()\n",
+    "        \n",
+    "        risk_exposure = total_data[all_styles].values\n",
+    "        risk_cov = risk_cov[all_styles].values\n",
+    "        special_risk = total_data['s_srisk'].values\n",
+    "        sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000\n",
+    "\n",
+    "        benchmark_w = total_data.weight.values\n",
+    "        \n",
+    "        total_risk_exp = np.concatenate([total_data[constraint_risk].values.astype(float),\n",
+    "                                         np.ones((len(benchmark_w),1))],\n",
+    "                                        axis=1)\n",
+    "        total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)\n",
+    "        constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)\n",
+    "        \n",
+    "        lbound = np.zeros(len(total_data))\n",
+    "        ubound = 0.05 + benchmark_w\n",
+    "\n",
+    "        er = predicts[i].loc[codes].values.flatten()\n",
+    "        cons_mat = np.ones((len(er), 1))\n",
+    "        risk_target = (benchmark_w.sum(), benchmark_w.sum())\n",
+    "        \n",
+    "        try:\n",
+    "            target_pos, _ = er_portfolio_analysis(er,\n",
+    "                                                  industry_matrix.industry_name.values,\n",
+    "                                                  None,\n",
+    "                                                  constraints,\n",
+    "                                                  False,\n",
+    "                                                  benchmark_w,\n",
+    "                                                  method='tv',\n",
+    "                                                  lbound=lbound,\n",
+    "                                                  ubound=ubound,\n",
+    "                                                  cov=sec_cov,\n",
+    "                                                  target_vol=target_vol)\n",
+    "        except:\n",
+    "            import pdb\n",
+    "            pdb.set_trace()\n",
+    "\n",
+    "        target_pos['code'] = codes\n",
+    "        turn_over, executed_pos = executor.execute(target_pos=target_pos)\n",
+    "\n",
+    "        executed_codes = executed_pos.code.tolist()\n",
+    "        dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)\n",
+    "        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')\n",
+    "        result = pd.merge(result, dx_returns, on=['code'])\n",
+    "        \n",
+    "        excess_return = np.exp(result.dx.values) - 1. - index_return.loc[ref_date, 'dx']\n",
+    "        raw_weight = result.weight_x.values\n",
+    "        activate_weight = raw_weight - result.weight_y.values\n",
+    "        ret = raw_weight @ excess_return\n",
+    "        risk_adjusted_ic = np.corrcoef(excess_return, activate_weight)[0, 1]\n",
+    "        rets.append(np.log(1. + ret))\n",
+    "        ics.append(risk_adjusted_ic)\n",
+    "        executor.set_current(executed_pos)\n",
+    "        turn_overs.append(turn_over)\n",
+    "        \n",
+    "        leverage = raw_weight.sum()\n",
+    "        leverags.append(leverage)\n",
+    "        alpha_logger.info(f\"{ref_date} is finished with expected vol {np.sqrt((target_pos.weight.values - benchmark_w) @ sec_cov @ (target_pos.weight.values - benchmark_w)):.2f}\")\n",
+    "\n",
+    "    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'IC': ics, 'leverage': leverags}, index=ref_dates)\n",
+    "\n",
+    "    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.\n",
+    "    ret_df = ret_df.shift(1)\n",
+    "    ret_df.iloc[0] = 0.\n",
+    "    ret_df['tc_cost'] = ret_df.turn_over * 0.002\n",
+    "    return ret_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret_df = create_scenario(target_vol)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),\n",
+    "                                             title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq, 905),\n",
+    "                                             secondary_y='tc_cost')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}