added IC analysis example

265f1b2a · Dr.李 · 9c51cf03 · 265f1b2a
Commit 265f1b2a authored Feb 23, 2018 by Dr.李
Show whitespace changes
Inline Side-by-side

Showing with 358 additions and 0 deletions

Example 1 - factor IC analysis.ipynb notebooks/Example 1 - factor IC analysis.ipynb +358 -0

No files found.
--- a/notebooks/Example 1 - factor IC analysis.ipynb
+++ b/notebooks/Example 1 - factor IC analysis.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from matplotlib import pyplot as plt\n",
+    "from alphamind.api import *\n",
+    "from PyFin.api import *\n",
+    "\n",
+    "plt.style.use('ggplot')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Back test parameter settings\n",
+    "\"\"\"\n",
+    "\n",
+    "start_date = '2010-01-01'\n",
+    "end_date = '2018-02-14'\n",
+    "\n",
+    "frequency = '20b'\n",
+    "method = 'risk_neutral'\n",
+    "neutralize_risk = industry_styles\n",
+    "turn_over_target_base = 2.0\n",
+    "benchmark_total_lower = 1.0\n",
+    "benchmark_total_upper = 1.0\n",
+    "horizon = map_freq(frequency)\n",
+    "weight_gap = 0.300\n",
+    "benchmark_code = 905\n",
+    "universe_name = ['hs300']\n",
+    "universe = Universe('custom', universe_name)\n",
+    "ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n",
+    "\n",
+    "executor = NaiveExecutor()\n",
+    "connect_str = 'postgres+psycopg2://postgres:we083826@192.168.0.102/alpha'\n",
+    "engine = SqlEngine(connect_str)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Constraints settings\n",
+    "\"\"\"\n",
+    "\n",
+    "constraint_risk = ['SIZE', 'SIZENL', 'BETA']\n",
+    "total_risk_names = constraint_risk + ['benchmark', 'total']\n",
+    "\n",
+    "b_type = []\n",
+    "l_val = []\n",
+    "u_val = []\n",
+    "\n",
+    "for name in total_risk_names:\n",
+    "    if name == 'benchmark':\n",
+    "        b_type.append(BoundaryType.RELATIVE)\n",
+    "        l_val.append(benchmark_total_lower)\n",
+    "        u_val.append(benchmark_total_upper)\n",
+    "    elif name == 'total':\n",
+    "        b_type.append(BoundaryType.RELATIVE)\n",
+    "        l_val.append(1.0)\n",
+    "        u_val.append(1.0)\n",
+    "    else:\n",
+    "        b_type.append(BoundaryType.ABSOLUTE)\n",
+    "        l_val.append(0.0)\n",
+    "        u_val.append(0.0)\n",
+    "\n",
+    "bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "Shared data\n",
+    "\"\"\"\n",
+    "\n",
+    "index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
+    "                                                  offset=1).set_index('trade_date')\n",
+    "\n",
+    "codes_return = engine.fetch_dx_return_range(universe,\n",
+    "                                            dates=ref_dates,\n",
+    "                                            horizon=horizon,\n",
+    "                                            offset=1)\n",
+    "return_groups = codes_return.groupby('trade_date')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
+    "\n",
+    "    \"\"\"\n",
+    "    Model phase: we need 1 constant linear model and one linear regression model\n",
+    "    \"\"\"\n",
+    "    alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]\n",
+    "    simple_expression = LAST(factor_name) if positive else -LAST(factor_name)\n",
+    "\n",
+    "    const_features = {alpha_name[0]: simple_expression}\n",
+    "    const_weights = {alpha_name[0]: 1.}\n",
+    "\n",
+    "    const_model = ConstLinearModel(features=alpha_name,\n",
+    "                                   weights=const_weights)\n",
+    "\n",
+    "    const_model_factor_data = engine.fetch_data_range(universe,\n",
+    "                                                      const_features,\n",
+    "                                                      dates=ref_dates,\n",
+    "                                                      benchmark=benchmark_code)['factor'].dropna()\n",
+    "\n",
+    "    rets = []\n",
+    "    turn_overs = []\n",
+    "    leverags = []\n",
+    "    ics = []\n",
+    "    index_dates = []\n",
+    "    factor_groups = const_model_factor_data.groupby('trade_date')\n",
+    "\n",
+    "    for i, value in enumerate(factor_groups):\n",
+    "        date = value[0]\n",
+    "        data = value[1]\n",
+    "        index_dates.append(date)\n",
+    "\n",
+    "        total_data = data.fillna(data[alpha_name].median())\n",
+    "        alpha_logger.info('{0}: {1}'.format(date, len(total_data)))\n",
+    "        risk_exp = total_data[neutralize_risk].values.astype(float)\n",
+    "        industry = total_data.industry_code.values\n",
+    "        benchmark_w = total_data.weight.values\n",
+    "        is_in_benchmark = (benchmark_w > 0.).astype(float).reshape(-1, 1)\n",
+    "\n",
+    "        constraint_exp = total_data[constraint_risk].values\n",
+    "        risk_exp_expand = np.concatenate((constraint_exp,\n",
+    "                                          is_in_benchmark,\n",
+    "                                          np.ones_like(is_in_benchmark)), axis=1).astype(float)\n",
+    "        total_risk_exp = pd.DataFrame(risk_exp_expand, columns=total_risk_names)\n",
+    "        constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)\n",
+    "\n",
+    "        lbound = np.maximum(0., benchmark_w - weight_gap)\n",
+    "        ubound = weight_gap + benchmark_w\n",
+    "\n",
+    "        factor_values = factor_processing(total_data[alpha_name].values,\n",
+    "                                          pre_process=[winsorize_normal, standardize],\n",
+    "                                          risk_factors=risk_exp,\n",
+    "                                          post_process=[winsorize_normal, standardize])\n",
+    "\n",
+    "        # const linear model\n",
+    "        er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))\n",
+    "\n",
+    "        alpha_logger.info('{0} full re-balance'.format(date))\n",
+    "        target_pos, _ = er_portfolio_analysis(er,\n",
+    "                                              industry,\n",
+    "                                              None,\n",
+    "                                              constraints,\n",
+    "                                              False,\n",
+    "                                              benchmark_w,\n",
+    "                                              method=method,\n",
+    "                                              lbound=lbound,\n",
+    "                                              ubound=ubound)\n",
+    "\n",
+    "        target_pos['code'] = total_data['code'].values\n",
+    "\n",
+    "        turn_over, executed_pos = executor.execute(target_pos=target_pos)\n",
+    "        dx_returns = return_groups.get_group(date)\n",
+    "\n",
+    "        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')\n",
+    "        result = pd.merge(result, dx_returns, on=['code'])\n",
+    "\n",
+    "        leverage = result.weight_x.abs().sum()\n",
+    "\n",
+    "        excess_return = np.exp(result.dx.values) - 1. - index_return.loc[date, 'dx']\n",
+    "        raw_weight = result.weight_x.values\n",
+    "        activate_weight = raw_weight - result.weight_y.values\n",
+    "        ret = raw_weight @ excess_return\n",
+    "        risk_adjusted_ic = np.corrcoef(excess_return, activate_weight)[0, 1]\n",
+    "        rets.append(np.log(1. + ret))\n",
+    "        ics.append(risk_adjusted_ic)\n",
+    "        executor.set_current(executed_pos)\n",
+    "        turn_overs.append(turn_over)\n",
+    "        leverags.append(leverage)\n",
+    "\n",
+    "        alpha_logger.info('{0} is finished'.format(date))\n",
+    "\n",
+    "    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'IC': ics, 'leverage': leverags}, index=index_dates)\n",
+    "\n",
+    "    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.\n",
+    "    ret_df = ret_df.shift(1)\n",
+    "    ret_df.iloc[0] = 0.\n",
+    "    ret_df['tc_cost'] = ret_df.turn_over * 0.002\n",
+    "\n",
+    "    return alpha_name[0], ret_df\n",
+    "\n",
+    "def worker_func_positive(factor_name):\n",
+    "    from alphamind.api import SqlEngine\n",
+    "    engine = SqlEngine(connect_str)\n",
+    "    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True)\n",
+    "\n",
+    "\n",
+    "def worker_func_negative(factor_name):\n",
+    "    from alphamind.api import SqlEngine\n",
+    "    engine = SqlEngine(connect_str)\n",
+    "    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "df = engine.fetch_factor_coverage(start_date='2011-01-01',\n",
+    "                                  end_date='2018-02-12',\n",
+    "                                  universe='hs300')\n",
+    "df = df[df.source != 'risk_exposure']\n",
+    "df = df.groupby('factor').mean()\n",
+    "df = df[df.coverage >= 0.98]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "\n",
+    "from dask.distributed import Client\n",
+    "from dask.diagnostics import ProgressBar\n",
+    "\n",
+    "client = Client('192.168.0.102:8786')\n",
+    "\n",
+    "tasks = client.map(worker_func_positive, df.index.tolist(), pure=False)\n",
+    "with ProgressBar():\n",
+    "    res1 = client.gather(tasks)\n",
+    "\n",
+    "tasks = client.map(worker_func_negative, df.index.tolist(), pure=False)\n",
+    "with ProgressBar():\n",
+    "    res2 = client.gather(tasks)\n",
+    "\n",
+    "factor_df = pd.DataFrame()\n",
+    "ic_df = pd.DataFrame()\n",
+    "\n",
+    "for f_name, df in res1:\n",
+    "    factor_df[f_name] = df['returns']\n",
+    "    ic_df[f_name] = df['IC']\n",
+    "\n",
+    "for f_name, df in res2:\n",
+    "    factor_df[f_name] = df['returns']\n",
+    "    ic_df[f_name] = df['IC']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "writer = pd.ExcelWriter(f'{universe_name[0]}.xlsx', engine='xlsxwriter')\n",
+    "factor_df.to_excel(writer, sheet_name='returns')\n",
+    "ic_df.to_excel(writer, sheet_name='ics')\n",
+    "writer.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ic_df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}