Commit c461aee5 authored by Dr.李's avatar Dr.李

update example

parent 4b379e58
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -28,20 +28,20 @@ ...@@ -28,20 +28,20 @@
"\"\"\"\n", "\"\"\"\n",
"\n", "\n",
"start_date = '2010-01-01'\n", "start_date = '2010-01-01'\n",
"end_date = '2018-02-14'\n", "end_date = '2018-02-24'\n",
"\n", "\n",
"frequency = '10b'\n", "frequency = '10b'\n",
"industry_lower = 1.0\n", "industry_lower = 1.0\n",
"industry_upper = 1.0\n", "industry_upper = 1.0\n",
"method = 'risk_neutral'\n", "method = 'risk_neutral'\n",
"neutralize_risk = industry_styles\n", "neutralize_risk = ['SIZE'] + industry_styles\n",
"industry_name = 'sw_adj'\n", "industry_name = 'sw_adj'\n",
"industry_level = 1\n", "industry_level = 1\n",
"benchmark_total_lower = 0.8\n", "benchmark_total_lower = 0.8\n",
"benchmark_total_upper = 1.0\n", "benchmark_total_upper = 1.0\n",
"horizon = map_freq(frequency)\n", "horizon = map_freq(frequency)\n",
"weight_gap = 0.01\n", "weight_gap = 0.01\n",
"benchmark_code = 300\n", "benchmark_code = 905\n",
"universe_name = ['zz800']\n", "universe_name = ['zz800']\n",
"universe = Universe('custom', universe_name)\n", "universe = Universe('custom', universe_name)\n",
"ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n", "ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n",
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
"\"\"\"\n", "\"\"\"\n",
"\n", "\n",
"industry_names = industry_list(industry_name, industry_level)\n", "industry_names = industry_list(industry_name, industry_level)\n",
"constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names\n", "constraint_risk = ['SIZE', 'SIZENL', 'BETA']\n",
"total_risk_names = constraint_risk + ['benchmark', 'total']\n", "total_risk_names = constraint_risk + ['benchmark', 'total']\n",
"\n", "\n",
"b_type = []\n", "b_type = []\n",
...@@ -88,33 +88,25 @@ ...@@ -88,33 +88,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"\"\"\"\n", "def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
"Shared data\n", " \n",
"\"\"\"\n", " \"\"\"\n",
"\n", " Data phase\n",
"index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n", " \"\"\"\n",
" index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
" offset=1).set_index('trade_date')\n", " offset=1).set_index('trade_date')\n",
"\n", "\n",
"codes_return = engine.fetch_dx_return_range(universe,\n", " codes_return = engine.fetch_dx_return_range(universe,\n",
" dates=ref_dates,\n", " dates=ref_dates,\n",
" horizon=horizon,\n", " horizon=horizon,\n",
" offset=1)\n", " offset=1)\n",
"\n",
"return_groups = codes_return.groupby('trade_date')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
"\n", "\n",
" return_groups = codes_return.groupby('trade_date')\n",
" \n",
" \"\"\"\n", " \"\"\"\n",
" Model phase: we need 1 constant linear model and one linear regression model\n", " Model phase: we need 1 constant linear model and one linear regression model\n",
" \"\"\"\n", " \"\"\"\n",
...@@ -122,7 +114,7 @@ ...@@ -122,7 +114,7 @@
" industry_groups = industry_total.groupby('trade_date')\n", " industry_groups = industry_total.groupby('trade_date')\n",
" \n", " \n",
" alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]\n", " alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]\n",
" simple_expression = LAST(factor_name) if positive else -LAST(factor_name)\n", " simple_expression = CSRes(LAST(factor_name), 'roe_q') if positive else -CSRes(LAST(factor_name), 'roe_q')\n",
"\n", "\n",
" const_features = {alpha_name[0]: simple_expression}\n", " const_features = {alpha_name[0]: simple_expression}\n",
" const_weights = {alpha_name[0]: 1.}\n", " const_weights = {alpha_name[0]: 1.}\n",
...@@ -168,7 +160,7 @@ ...@@ -168,7 +160,7 @@
" factor_values = factor_processing(total_data[alpha_name].values,\n", " factor_values = factor_processing(total_data[alpha_name].values,\n",
" pre_process=[winsorize_normal, standardize],\n", " pre_process=[winsorize_normal, standardize],\n",
" risk_factors=risk_exp,\n", " risk_factors=risk_exp,\n",
" post_process=[winsorize_normal, standardize])\n", " post_process=[winsorize_normal, standardize, rank])\n",
"\n", "\n",
" # const linear model\n", " # const linear model\n",
" er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))\n", " er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))\n",
...@@ -230,7 +222,7 @@ ...@@ -230,7 +222,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -244,14 +236,14 @@ ...@@ -244,14 +236,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Wall time: 39min 15s\n" "Wall time: 1h 25min 5s\n"
] ]
} }
], ],
...@@ -260,7 +252,7 @@ ...@@ -260,7 +252,7 @@
"\n", "\n",
"from dask.distributed import Client\n", "from dask.distributed import Client\n",
"\n", "\n",
"client = Client('10.63.6.13:8786')\n", "client = Client('10.63.6.176:8786')\n",
"\n", "\n",
"tasks = client.map(worker_func_positive, df.index.tolist(), pure=False)\n", "tasks = client.map(worker_func_positive, df.index.tolist(), pure=False)\n",
"res1 = client.gather(tasks)\n", "res1 = client.gather(tasks)\n",
...@@ -282,14 +274,37 @@ ...@@ -282,14 +274,37 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"factor_res = factor_df.agg(['mean', 'std']).T\n",
"factor_res['t.'] = factor_res['mean'] / factor_res['std'] * np.sqrt(len(factor_df))\n",
"\n",
"ic_res = ic_df.agg(['mean', 'std']).T\n",
"ic_res['t.'] = ic_res['mean'] / ic_res['std'] * np.sqrt(len(ic_df))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"with pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:\n",
" factor_df.to_excel(writer, sheet_name='ret')\n",
" factor_res.to_excel(writer, sheet_name='ic')\n",
" factor_df.to_excel(writer, sheet_name='ret_stat')\n",
" ic_res.to_excel(writer, sheet_name='ic_stat')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"writer = pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter')\n", "client.close()"
"factor_df.to_excel(writer, sheet_name='returns')\n",
"ic_df.to_excel(writer, sheet_name='ics')\n",
"writer.close()"
] ]
}, },
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment