Commit c461aee5 authored by Dr.李's avatar Dr.李

update example

parent 4b379e58
......@@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -28,20 +28,20 @@
"\"\"\"\n",
"\n",
"start_date = '2010-01-01'\n",
"end_date = '2018-02-14'\n",
"end_date = '2018-02-24'\n",
"\n",
"frequency = '10b'\n",
"industry_lower = 1.0\n",
"industry_upper = 1.0\n",
"method = 'risk_neutral'\n",
"neutralize_risk = industry_styles\n",
"neutralize_risk = ['SIZE'] + industry_styles\n",
"industry_name = 'sw_adj'\n",
"industry_level = 1\n",
"benchmark_total_lower = 0.8\n",
"benchmark_total_upper = 1.0\n",
"horizon = map_freq(frequency)\n",
"weight_gap = 0.01\n",
"benchmark_code = 300\n",
"benchmark_code = 905\n",
"universe_name = ['zz800']\n",
"universe = Universe('custom', universe_name)\n",
"ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n",
......@@ -53,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
......@@ -62,7 +62,7 @@
"\"\"\"\n",
"\n",
"industry_names = industry_list(industry_name, industry_level)\n",
"constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names\n",
"constraint_risk = ['SIZE', 'SIZENL', 'BETA']\n",
"total_risk_names = constraint_risk + ['benchmark', 'total']\n",
"\n",
"b_type = []\n",
......@@ -88,33 +88,25 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Shared data\n",
"\"\"\"\n",
"\n",
"index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
"def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
" \n",
" \"\"\"\n",
" Data phase\n",
" \"\"\"\n",
" index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,\n",
" offset=1).set_index('trade_date')\n",
"\n",
"codes_return = engine.fetch_dx_return_range(universe,\n",
" dates=ref_dates,\n",
" horizon=horizon,\n",
" offset=1)\n",
"\n",
"return_groups = codes_return.groupby('trade_date')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def factor_analysis(engine, factor_name, universe, benchmark_code, positive):\n",
" codes_return = engine.fetch_dx_return_range(universe,\n",
" dates=ref_dates,\n",
" horizon=horizon,\n",
" offset=1)\n",
"\n",
" return_groups = codes_return.groupby('trade_date')\n",
" \n",
" \"\"\"\n",
" Model phase: we need 1 constant linear model and one linear regression model\n",
" \"\"\"\n",
......@@ -122,7 +114,7 @@
" industry_groups = industry_total.groupby('trade_date')\n",
" \n",
" alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]\n",
" simple_expression = LAST(factor_name) if positive else -LAST(factor_name)\n",
" simple_expression = CSRes(LAST(factor_name), 'roe_q') if positive else -CSRes(LAST(factor_name), 'roe_q')\n",
"\n",
" const_features = {alpha_name[0]: simple_expression}\n",
" const_weights = {alpha_name[0]: 1.}\n",
......@@ -168,7 +160,7 @@
" factor_values = factor_processing(total_data[alpha_name].values,\n",
" pre_process=[winsorize_normal, standardize],\n",
" risk_factors=risk_exp,\n",
" post_process=[winsorize_normal, standardize])\n",
" post_process=[winsorize_normal, standardize, rank])\n",
"\n",
" # const linear model\n",
" er = const_model.predict(pd.DataFrame(data={alpha_name[0]: factor_values.flatten()}))\n",
......@@ -230,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
......@@ -244,14 +236,14 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wall time: 39min 15s\n"
"Wall time: 1h 25min 5s\n"
]
}
],
......@@ -260,7 +252,7 @@
"\n",
"from dask.distributed import Client\n",
"\n",
"client = Client('10.63.6.13:8786')\n",
"client = Client('10.63.6.176:8786')\n",
"\n",
"tasks = client.map(worker_func_positive, df.index.tolist(), pure=False)\n",
"res1 = client.gather(tasks)\n",
......@@ -282,14 +274,37 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"factor_res = factor_df.agg(['mean', 'std']).T\n",
"factor_res['t.'] = factor_res['mean'] / factor_res['std'] * np.sqrt(len(factor_df))\n",
"\n",
"ic_res = ic_df.agg(['mean', 'std']).T\n",
"ic_res['t.'] = ic_res['mean'] / ic_res['std'] * np.sqrt(len(ic_df))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"with pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:\n",
" factor_df.to_excel(writer, sheet_name='ret')\n",
" factor_res.to_excel(writer, sheet_name='ic')\n",
" factor_df.to_excel(writer, sheet_name='ret_stat')\n",
" ic_res.to_excel(writer, sheet_name='ic_stat')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"writer = pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter')\n",
"factor_df.to_excel(writer, sheet_name='returns')\n",
"ic_df.to_excel(writer, sheet_name='ics')\n",
"writer.close()"
"client.close()"
]
},
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment