update example

c461aee5 · Dr.李 · 4b379e58 · c461aee5 · c461aee5 · c461aee5
Commit c461aee5 authored Feb 28, 2018 by Dr.李
4 changed files
--- a/notebooks/Example 1 - Factor IC analysis.ipynb
+++ b/notebooks/Example 1 - Factor IC analysis.ipynb
@@ -19,7 +19,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -28,20 +28,20 @@
    "\"\"\"\n",
    "\n",
    "start_date = '2010-01-01'\n",
-    "end_date = '2018-02-14'\n",
+    "end_date = '2018-02-24'\n",
    "\n",
    "frequency = '10b'\n",
    "industry_lower = 1.0\n",
    "industry_upper = 1.0\n",
    "method = 'risk_neutral'\n",
-    "neutralize_risk = industry_styles\n",
+    "neutralize_risk = ['SIZE'] + industry_styles\n",
    "industry_name = 'sw_adj'\n",
    "industry_level = 1\n",
    "benchmark_total_lower = 0.8\n",
    "benchmark_total_upper = 1.0\n",
    "horizon = map_freq(frequency)\n",
    "weight_gap = 0.01\n",
-    "benchmark_code = 300\n",
+    "benchmark_code = 905\n",
    "universe_name = ['zz800']\n",
    "universe = Universe('custom', universe_name)\n",
    "ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n",
@@ -53,7 +53,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -62,7 +62,7 @@
    "\"\"\"\n",
    "\n",
    "industry_names = industry_list(industry_name, industry_level)\n",
-    "constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names\n",
+    "constraint_risk = ['SIZE', 'SIZENL', 'BETA']\n",
    "total_risk_names = constraint_risk + ['benchmark', 'total']\n",
    "\n",
    "b_type = []\n",
@@ -88,33 +88,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
-    "\"\"\"\n",
-    "Shared data\n",
-    "\"\"\"\n",
-    "\n",
-    "index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
+    "def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
+    "    \n",
+    "    \"\"\"\n",
+    "    Data phase\n",
+    "    \"\"\"\n",
+    "    index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
    "                                                  offset=1).set_index('trade_date')\n",
    "\n",
-    "codes_return = engine.fetch_dx_return_range(universe,\n",
-    "                                            dates=ref_dates,\n",
-    "                                            horizon=horizon,\n",
-    "                                            offset=1)\n",
-    "\n",
-    "return_groups = codes_return.groupby('trade_date')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
+    "    codes_return = engine.fetch_dx_return_range(universe,\n",
+    "                                                dates=ref_dates,\n",
+    "                                                horizon=horizon,\n",
+    "                                                offset=1)\n",
    "\n",
+    "    return_groups = codes_return.groupby('trade_date')\n",
+    "    \n",
    "    \"\"\"\n",
    "    Model phase: we need 1 constant linear model and one linear regression model\n",
    "    \"\"\"\n",
@@ -122,7 +114,7 @@
    "    industry_groups = industry_total.groupby('trade_date')\n",
    "    \n",
    "    alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]\n",
-    "    simple_expression = LAST(factor_name) if positive else -LAST(factor_name)\n",
+    "    simple_expression = CSRes(LAST(factor_name), 'roe_q') if positive else -CSRes(LAST(factor_name), 'roe_q')\n",
    "\n",
    "    const_features = {alpha_name[0]: simple_expression}\n",
    "    const_weights = {alpha_name[0]: 1.}\n",
@@ -168,7 +160,7 @@
    "        factor_values = factor_processing(total_data[alpha_name].values,\n",
    "                                          pre_process=[winsorize_normal, standardize],\n",
    "                                          risk_factors=risk_exp,\n",
-    "                                          post_process=[winsorize_normal, standardize])\n",
+    "                                          post_process=[winsorize_normal, standardize, rank])\n",
    "\n",
    "        # const linear model\n",
    "        er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))\n",
@@ -230,7 +222,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -244,14 +236,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Wall time: 39min 15s\n"
+      "Wall time: 1h 25min 5s\n"
     ]
    }
   ],
@@ -260,7 +252,7 @@
    "\n",
    "from dask.distributed import Client\n",
    "\n",
-    "client = Client('10.63.6.13:8786')\n",
+    "client = Client('10.63.6.176:8786')\n",
    "\n",
    "tasks = client.map(worker_func_positive, df.index.tolist(), pure=False)\n",
    "res1 = client.gather(tasks)\n",
@@ -282,14 +274,37 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "factor_res = factor_df.agg(['mean', 'std']).T\n",
+    "factor_res['t.'] = factor_res['mean'] / factor_res['std'] * np.sqrt(len(factor_df))\n",
+    "\n",
+    "ic_res = ic_df.agg(['mean', 'std']).T\n",
+    "ic_res['t.'] = ic_res['mean'] / ic_res['std'] * np.sqrt(len(ic_df))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:\n",
+    "    factor_df.to_excel(writer, sheet_name='ret')\n",
+    "    factor_res.to_excel(writer, sheet_name='ic')\n",
+    "    factor_df.to_excel(writer, sheet_name='ret_stat')\n",
+    "    ic_res.to_excel(writer, sheet_name='ic_stat')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
-    "writer = pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter')\n",
-    "factor_df.to_excel(writer, sheet_name='returns')\n",
-    "ic_df.to_excel(writer, sheet_name='ics')\n",
-    "writer.close()"
+    "client.close()"
   ]
  },
  {

--- a/notebooks/Example 2 - Strategy Analysis.ipynb
+++ b/notebooks/Example 2 - Strategy Analysis.ipynb
--- a/notebooks/Example 3 - Multi Weight Gap Comparison.ipynb
+++ b/notebooks/Example 3 - Multi Weight Gap Comparison.ipynb
--- a/notebooks/Example 4 - Single Factor Analysis.ipynb
+++ b/notebooks/Example 4 - Single Factor Analysis.ipynb