Commit 64c42d49 authored by Dr.李's avatar Dr.李

update v2

parent 3bb27b59
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 22,
"metadata": { "metadata": {
"collapsed": true "collapsed": true
}, },
...@@ -16,9 +16,394 @@ ...@@ -16,9 +16,394 @@
"from alphamind.api import *\n", "from alphamind.api import *\n",
"from PyFin.api import *\n", "from PyFin.api import *\n",
"import xgboost as xgb\n", "import xgboost as xgb\n",
"from sklearn.model_selection import train_test_split\n",
"\n", "\n",
"plt.style.use('fivethirtyeight')\n", "plt.style.use('fivethirtyeight')\n",
"engine = SqlEngine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha')" "engine = SqlEngine('postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"u_name = 'zz500'\n",
"benchmark = 905\n",
"universe = Universe(u_name, [u_name])\n",
"factor_coverage = engine.fetch_factor_coverage()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"flitered_coverage = factor_coverage[((factor_coverage.source == 'uqer'))\n",
" & (factor_coverage.universe == u_name) \n",
" & (factor_coverage.trade_date >= '2012-01-01')]\n",
"coverage_report = flitered_coverage.groupby(['factor'])['coverage'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"alpha_factors = coverage_report[coverage_report >= 0.99].index.tolist()\n",
"\n",
"alpha_factors = {\n",
" f: DIFF(f) / (ABS(f) + 1e-4) / (DIFF('closePrice') / (LAST('closePrice') + 1e-4))\n",
" for f in alpha_factors if f != 'SIZE'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"202"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alpha_factors.__len__()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"frequency = '2w'\n",
"batch = 8\n",
"start_date = '2012-01-01'\n",
"end_date = '2017-10-25'\n",
"method = 'risk_neutral'\n",
"portfolio_risk_neutralize = ['SIZE']\n",
"neutralize_risk = industry_styles + portfolio_risk_neutralize\n",
"industry_lower = 1.\n",
"industry_upper = 1."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2017-10-30 17:03:41,507 - ALPHA_MIND - INFO - Starting data package fetching ...\n",
"D:\\ProgramData\\IntelPython3_2018\\lib\\site-packages\\alpha_mind-0.1.0-py3.6-win-amd64.egg\\alphamind\\data\\transformer.py:78: RuntimeWarning: invalid value encountered in true_divide\n",
" dropna=False)\n",
"D:\\ProgramData\\IntelPython3_2018\\lib\\site-packages\\alpha_mind-0.1.0-py3.6-win-amd64.egg\\alphamind\\data\\transformer.py:78: RuntimeWarning: divide by zero encountered in true_divide\n",
" dropna=False)\n",
"2017-10-30 17:06:05,198 - ALPHA_MIND - INFO - Loading data is finished\n",
"2017-10-30 17:06:59,024 - ALPHA_MIND - INFO - Data processing is finished\n"
]
}
],
"source": [
"data_package = fetch_data_package(engine,\n",
" alpha_factors=alpha_factors,\n",
" start_date=start_date,\n",
" end_date=end_date,\n",
" frequency=frequency,\n",
" universe=universe,\n",
" benchmark=benchmark,\n",
" batch=batch,\n",
" neutralized_risk=neutralize_risk,\n",
" pre_process=[winsorize_normal],\n",
" post_process=[winsorize_normal],\n",
" warm_start=batch)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_x = data_package['train']['x']\n",
"train_y = data_package['train']['y']\n",
"\n",
"predict_x = data_package['predict']['x']\n",
"predict_y = data_package['predict']['y']\n",
"\n",
"features = data_package['x_names']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def plot_model_importance(model, features):\n",
" features = np.array(features)\n",
" n_features = len(features)\n",
" features_importance = model.feature_importances_\n",
" order = features_importance.argsort().argsort()\n",
" features = features[order >= n_features - 10]\n",
" features_importance = features_importance[order >= n_features - 10]\n",
" n_features = len(features)\n",
" plt.figure(figsize=(12, 6))\n",
" plt.barh(range(n_features), features_importance, align='center')\n",
" plt.yticks(np.arange(n_features), features)\n",
" plt.xlabel('Feature importance')\n",
" plt.ylabel('Feature')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 0. Train Score on a specific date\n",
"------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ref_date = list(train_x.keys())[-1]\n",
"sample_train_x = train_x[ref_date]\n",
"sample_train_y = train_y[ref_date].flatten()\n",
"\n",
"sample_test_x = predict_x[ref_date]\n",
"sample_test_y = predict_y[ref_date].flatten()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0]\tval-rmse:0.358247\n",
"Will train until val-rmse hasn't improved in 20 rounds.\n",
"[1]\tval-rmse:0.255458\n",
"[2]\tval-rmse:0.184941\n",
"[3]\tval-rmse:0.137607\n",
"[4]\tval-rmse:0.106497\n",
"[5]\tval-rmse:0.086864\n",
"[6]\tval-rmse:0.07489\n",
"[7]\tval-rmse:0.068031\n",
"[8]\tval-rmse:0.064358\n",
"[9]\tval-rmse:0.062044\n",
"[10]\tval-rmse:0.0612\n",
"[11]\tval-rmse:0.060778\n",
"[12]\tval-rmse:0.060721\n",
"[13]\tval-rmse:0.060811\n",
"[14]\tval-rmse:0.060485\n",
"[15]\tval-rmse:0.060291\n",
"[16]\tval-rmse:0.060126\n",
"[17]\tval-rmse:0.060461\n",
"[18]\tval-rmse:0.060616\n",
"[19]\tval-rmse:0.060645\n",
"[20]\tval-rmse:0.060813\n",
"[21]\tval-rmse:0.06065\n",
"[22]\tval-rmse:0.06056\n",
"[23]\tval-rmse:0.060573\n",
"[24]\tval-rmse:0.060879\n",
"[25]\tval-rmse:0.06104\n",
"[26]\tval-rmse:0.061136\n",
"[27]\tval-rmse:0.0612\n",
"[28]\tval-rmse:0.061146\n",
"[29]\tval-rmse:0.061263\n",
"[30]\tval-rmse:0.061236\n",
"[31]\tval-rmse:0.061272\n",
"[32]\tval-rmse:0.061228\n",
"[33]\tval-rmse:0.061271\n",
"[34]\tval-rmse:0.061277\n",
"[35]\tval-rmse:0.061287\n",
"[36]\tval-rmse:0.061349\n",
"Stopping. Best iteration:\n",
"[16]\tval-rmse:0.060126\n",
"\n",
"Wall time: 50.6 s\n"
]
}
],
"source": [
"%%time\n",
"\n",
"param = {'objective': 'reg:linear',\n",
" 'tree_method': 'hist'}\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(sample_train_x, sample_train_y, test_size=0.25, random_state=42)\n",
"\n",
"dtrain = xgb.DMatrix(X_train, label=y_train)\n",
"dval = xgb.DMatrix(X_val, label=y_val)\n",
"num_round = 500\n",
"model = xgb.train(param, dtrain, num_round, evals=[(dval, 'val')], early_stopping_rounds=10)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.01240957, -0.01542193, 0.003759 , -0.0370059 , -0.01399595,\n",
" -0.02125567, -0.03098136, -0.02801037, 0.00610146, 0.00834453,\n",
" 0.00155056, -0.01593572, -0.05426192, -0.05738616, -0.03181559,\n",
" 0.02134934, 0.00575542, -0.0022133 , -0.02373421, -0.04187602,\n",
" -0.01939499, 0.01149094, -0.00942582, -0.01294637, 0.03033528,\n",
" 0.0812315 , -0.03541517, -0.02579325, -0.02382213, -0.03661305,\n",
" -0.00801933, -0.00755411, -0.01823515, -0.0351119 , -0.03827125,\n",
" -0.06906605, -0.00657129, -0.02492863, -0.00713164, -0.02199388,\n",
" -0.01912141, 0.0357779 , -0.02171987, -0.00683856, -0.01550424,\n",
" 0.0036301 , -0.00993472, 0.00213516, 0.05597222, -0.03993398,\n",
" -0.00298005, -0.02027029, -0.02534062, -0.02602386, -0.00823247,\n",
" 0.04025003, -0.03899878, -0.00370234, -0.01509917, -0.02637047,\n",
" -0.02993429, 0.03641999, -0.00127167, 0.02978992, 0.0345099 ,\n",
" -0.01929975, -0.009942 , 0.02140123, -0.0099209 , 0.01546139,\n",
" -0.01639247, -0.03365564, -0.04523635, -0.04774266, 0.08302578,\n",
" 0.00277618, -0.05502254, 0.03744963, 0.02932486, -0.00341415,\n",
" 0.02102587, -0.02130085, -0.01064438, -0.00281268, -0.0393275 ,\n",
" -0.00551182, 0.02216128, 0.00336897, -0.02149767, 0.00432244,\n",
" -0.01085776, -0.02662468, -0.03377903, -0.01227343, 0.00042588,\n",
" -0.02542597, -0.05954915, -0.03823936, -0.00928009, -0.04934055,\n",
" 0.01497418, -0.01202613, -0.05176103, -0.01371014, -0.01520729,\n",
" -0.0345453 , -0.00981134, 0.05190268, -0.02993858, -0.00521314,\n",
" 0.05042419, 0.0068022 , 0.03850815, 0.0054971 , -0.01335073,\n",
" 0.03709635, -0.02084929, -0.04489976, -0.01437265, 0.03551191,\n",
" -0.01103634, -0.02191484, -0.00293249, -0.05109465, 0.0148676 ,\n",
" -0.0535078 , 0.06882447, -0.01370162, 0.00520322, -0.03902239,\n",
" 0.11518031, 0.0146822 , -0.0239194 , 0.00133988, -0.02818328,\n",
" -0.02285409, -0.00797224, -0.013053 , -0.0150677 , -0.01403576,\n",
" -0.01198524, -0.01477695, -0.01634622, -0.04748571, 0.02015179,\n",
" -0.0155257 , -0.00101227, 0.00044355, -0.03880119, 0.02849859,\n",
" -0.02591705, -0.02559918, -0.01068062, -0.03854823, -0.03522623,\n",
" 0.00016773, -0.0215205 , 0.00274503, 0.02998054, 0.05022603,\n",
" -0.00204605, -0.0037967 , -0.03397596, 0.00248042, -0.01018918,\n",
" -0.02307081, 0.03564245, -0.0192104 , -0.0188635 , -0.00876307,\n",
" -0.01475793, 0.03806627, 0.00400147, -0.05255032, -0.00407612,\n",
" -0.05250466, 0.04599616, -0.01867467, -0.0290935 , -0.00397712,\n",
" 0.03026605, -0.04073852, -0.00887918, -0.00898904, 0.00711107,\n",
" -0.02673113, -0.00063246, 0.00690308, 0.01755196, -0.01112407,\n",
" 0.01613739, -0.01626182, -0.01776564, -0.04049528, -0.02316129,\n",
" -0.03404057, 0.05844504, 0.04776457, -0.00024652, -0.02793056,\n",
" -0.00566602, 0.0254831 , 0.01520956, -0.023395 , -0.0574829 ,\n",
" -0.01788056, -0.02763474, 0.00411063, 0.00771302, 0.00225639,\n",
" -0.03258193, 0.01864567, 0.00359404, -0.02853906, -0.02357769,\n",
" -0.01055729, 0.02848527, -0.01142102, -0.01484263, 0.01096508,\n",
" -0.0058884 , -0.03850198, -0.01169294, -0.02016002, -0.00858271,\n",
" -0.01737535, -0.01132357, -0.01537168, 0.01681358, -0.01885867,\n",
" 0.00720814, -0.00807756, -0.01558983, -0.04140353, -0.00119007,\n",
" 0.00362846, -0.00405478, 0.01587 , -0.00223809, -0.02734447,\n",
" 0.0603801 , -0.00600296, -0.00761539, -0.03125739, -0.0182476 ,\n",
" -0.02051514, -0.06340379, -0.0132798 , -0.00706446, -0.0051083 ,\n",
" -0.06194752, -0.02370489, -0.02759361, -0.07792807, -0.01646417,\n",
" -0.00862998, 0.01760298, -0.02154881, -0.02350742, 0.00072682,\n",
" -0.01519167, -0.01736462, -0.01713592, -0.02934706, -0.01179403,\n",
" -0.01066124, -0.02096367, -0.01437968, -0.01116806, 0.02301767,\n",
" -0.02210826, -0.01846623, 0.00143921, 0.02955455, -0.00900269,\n",
" -0.00846171, -0.02978265, -0.03844941, -0.07085401, -0.0257141 ,\n",
" 0.08375698, -0.01819605, 0.00556257, -0.0071398 , -0.02632672,\n",
" 0.01958469, -0.03904194, 0.00415304, 0.02383152, 0.06204295,\n",
" -0.01177365, -0.02184021, -0.00405794, -0.00583315, 0.01149747,\n",
" -0.02126378, -0.01744574, -0.00426322, -0.02150643, -0.00104535,\n",
" 0.02841917, 0.02070701, -0.00639737, 0.0098393 , -0.04142082,\n",
" -0.03396833, -0.00297564, 0.08319226, -0.00794584, -0.05390674,\n",
" -0.00478739, 0.06837583, -0.00536132, -0.03343207, -0.03414571,\n",
" 0.02997369, -0.00545079, -0.00386769, 0.00255051, 0.01306653,\n",
" 0.0048641 , 0.03433934, -0.02378434, 0.04656109, 0.0096634 ,\n",
" 0.0046961 , -0.00100988, -0.01268989, 0.02552027, -0.03888869,\n",
" 0.01128921, -0.01726711, -0.01825362, 0.00361755, -0.01530814,\n",
" -0.01985651, -0.01562345, -0.0304051 , -0.01799905, -0.00982022,\n",
" -0.01764894, 0.00533196, -0.02617782, 0.005638 , -0.03825927,\n",
" -0.02882785, -0.01596993, 0.03713408, -0.02809715, -0.0173474 ,\n",
" -0.00776845, -0.02215266, -0.00102746, -0.00827265, -0.01905811,\n",
" -0.02848411, 0.03215998, -0.02207923, 0.00932205, -0.01126087,\n",
" -0.00565606, 0.02935371, -0.00059187, -0.01205027, -0.05444193,\n",
" -0.02233607, -0.03185856, -0.01197594, 0.01181379, 0.02150306,\n",
" -0.02015579, -0.00327814, -0.05243367, 0.01090935, -0.01362717,\n",
" -0.00649858, -0.04995716, 0.00111824, -0.00596726, 0.00199249,\n",
" -0.01414698, -0.02048892, -0.02757788, 0.00264362, -0.01716697,\n",
" 0.03053036, -0.01781428, -0.01769686, 0.00382763, -0.02163672,\n",
" -0.02261263, -0.00740075, -0.04856652, 0.00468129, -0.0031448 ,\n",
" -0.01368171, -0.00363648, -0.02272427, 0.02934048, -0.01362318,\n",
" -0.00830847, -0.01344341, 0.01296824, -0.01586258, -0.01011288,\n",
" -0.0408842 , -0.02888894, 0.135299 , -0.01826584, 0.00179467,\n",
" -0.02017748, 0.00013959, -0.00703186, -0.02337551, -0.00994235,\n",
" -0.00222278, -0.03552574, 0.00424978, -0.01230735, -0.03085071,\n",
" 0.00290561, -0.0943107 , -0.01236612, -0.03675449, -0.01225901,\n",
" -0.00345892, -0.01004827, -0.04368258, 0.00239447, 0.02305984,\n",
" -0.01924253, -0.0200364 , -0.0267784 , 0.00150818, 0.00038585,\n",
" -0.00622386, 0.01090187, 0.00142562, -0.00982726, -0.00414795,\n",
" 0.00195593, -0.00311685, 0.01556793, -0.0243153 , -0.00266767,\n",
" 0.01835045, -0.03958005, -0.02351582, -0.02302212, -0.01367074,\n",
" -0.01429409, -0.00776011, -0.01834613, 0.01772127, -0.00750965,\n",
" -0.03049898, -0.02398819, 0.04377386, -0.01814556, 0.02430058,\n",
" -0.00129294, -0.02336222, 0.0133948 , -0.00757259, 0.00265574,\n",
" 0.0106228 , -0.00414163, 0.03351626, 0.03367957, 0.05120924,\n",
" 0.04967251], dtype=float32)"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dpredict = xgb.DMatrix(sample_test_x, label=sample_test_y)\n",
"model.predict(dpredict)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'[0]\\teval-rmse:0.034598'"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.eval(dpredict)"
] ]
}, },
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment