update models

fc7ea7d4 · Dr.李 · 56273087 · fc7ea7d4 · fc7ea7d4 · fc7ea7d4
Commit fc7ea7d4 authored Sep 08, 2017 by Dr.李
Showing with 47 additions and 22 deletions

models.py alphamind/data/dbmodel/models.py +2 -3

factor_analysis_example.py alphamind/examples/factor_analysis_example.py +23 -8

model_training.py alphamind/examples/model_training.py +22 -11

No files found.
--- a/alphamind/data/dbmodel/models.py
+++ b/alphamind/data/dbmodel/models.py
@@ -1687,13 +1687,12 @@ class FullFactorView(Base):
 class Models(Base):
    __tablename__ = 'models'
    __table_args__ = (
-        Index('model_pk', 'trade_date', 'portfolio_name', 'model_type', 'version', unique=True),
+        Index('model_pk', 'trade_date', 'model_type', 'model_version', unique=True),
    )
    trade_date = Column(DateTime, primary_key=True, nullable=False)
-    portfolio_name = Column(String(30), primary_key=True, nullable=False)
    model_type = Column(String(30), primary_key=True, nullable=False)
-    version = Column(BigInteger, primary_key=True, nullable=False)
+    model_version = Column(BigInteger, primary_key=True, nullable=False)
    update_time = Column(DateTime, nullable=False)
    model_desc = Column(JSON, nullable=False)

--- a/alphamind/examples/factor_analysis_example.py
+++ b/alphamind/examples/factor_analysis_example.py
@@ -16,8 +16,18 @@ strategies = {
    'prod': {
        # 'factors': ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
        # 'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
-        'factors': ['CHV'],
+        #'factors':  ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
-        'weights': [1.]
+        #'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
+        'factors': ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO'],
+        'weights': [0.034129344,
+0.015881607,
+0.048765746,
+0.042747382,
+-0.015900173,
+0.019044573,
+-0.001792638,
+0.014277867,
+]
    },
    # 'candidate': {
    #     'factors': ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'GREV', 'ROEDiluted'],
@@ -31,19 +41,24 @@ universe = Universe('custom', ['zz500'])
 benchmark_code = 905
 neutralize_risk = industry_styles
 constraint_risk = industry_styles
-freq = '1w'
+freq = '2w'
 if freq == '1m':
    horizon = 21
 elif freq == '1w':
    horizon = 4
+elif freq == '2w':
+    horizon = 8
+elif freq == '3w':
+    horizon = 12
 elif freq == '1d':
    horizon = 0
 dates = makeSchedule('2017-01-01',
-                     '2017-08-20',
+                     '2017-09-05',
                     tenor=freq,
-                     calendar='china.sse')
+                     calendar='china.sse',
+                     dateGenerationRule=DateGeneration.Forward)
 total_data_dict = {}
@@ -107,10 +122,10 @@ for strategy in strategies:
 ret_df = pd.DataFrame(total_data_dict, index=dates)
+ret_df.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
+ret_df = ret_df.shift(1)
-start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
+ret_df.iloc[0] = 0.
-ret_df.loc[start_date] = 0.
-ret_df.sort_index(inplace=True)
 ret_df.cumsum().plot(figsize=(12, 6))
 plt.savefig("backtest_big_universe_20170814.png")

--- a/alphamind/examples/model_training.py
+++ b/alphamind/examples/model_training.py
@@ -9,6 +9,9 @@ import numpy as np
 import pandas as pd
 import copy
 from sklearn.linear_model import *
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import AdaBoostRegressor
+from sklearn.svm import NuSVR
 from alphamind.api import *
 from PyFin.api import *
 from matplotlib import pyplot as plt
@@ -34,14 +37,14 @@ universe = Universe('zz500', ['zz500'])
 neutralize_risk = ['SIZE'] + industry_styles
 portfolio_risk_neutralize = ['SIZE']
 portfolio_industry_neutralize = True
-alpha_factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']  # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
+alpha_factors = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'] # ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']  # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
 benchmark = 905
 n_bins = 5
-frequency = '1w'
+frequency = '2w'
-batch = 8
+batch = 1
-start_date = '2017-01-01'
+start_date = '2017-01-05'
-end_date = '2017-09-03'
+end_date = '2017-09-05'
-method = 'rank'
+method = 'risk_neutral'
 use_rank = 100
 '''
@@ -58,7 +61,7 @@ data_package = fetch_data_package(engine,
                                  batch=batch,
                                  neutralized_risk=neutralize_risk,
                                  pre_process=[winsorize_normal, standardize],
-                                  post_process=[standardize],
+                                  post_process=[winsorize_normal, standardize],
                                  warm_start=8)
 '''
@@ -70,15 +73,20 @@ train_y = data_package['train']['y']
 dates = sorted(train_x.keys())
-model = LinearRegression(fit_intercept=False)
 model_df = pd.Series()
 for train_date in dates:
+    #model = LinearRegression(alpha_factors, fit_intercept=False)
+    #model = LassoCV(fit_intercept=False)
+    #model = AdaBoostRegressor(n_estimators=100)
+    #model = RandomForestRegressor(n_estimators=100, n_jobs=4)
+    #model = NuSVR(kernel='rbf', C=1e-3, gamma=0.1)
+    model = ConstLinearModel(alpha_factors, np.array([0.05, 0.3, 0.35, 0.075, 0.15, 0.05]))
    x = train_x[train_date]
    y = train_y[train_date]
    model.fit(x, y)
-    model_df.loc[train_date] = copy.deepcopy(model)
+    model_df.loc[train_date] = model
    alpha_logger.info('trade_date: {0} training finished'.format(train_date))
 '''
@@ -118,7 +126,7 @@ settlement = data_package['settlement']
 predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
 '''
-industry_dummies = pd.get_dummies(settlement['industry_code'].values)
+industry_dummies = pd.get_dummies(settlement['industry'].values)
 risk_styles = settlement[portfolio_risk_neutralize].values
 final_res = np.zeros(len(dates))
@@ -133,6 +141,9 @@ for i, predict_date in enumerate(dates):
    industry_names = settlement[index]['industry'].values
    is_tradable = settlement[index]['isOpen'].values
+    cons.add_exposure(['total'], np.ones((len(is_tradable), 1)))
+    cons.set_constraints('total', benchmark_w.sum(), benchmark_w.sum())
    if portfolio_industry_neutralize:
        ind_exp = industry_dummies[index]
@@ -177,6 +188,6 @@ last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
 df = pd.Series(final_res, index=dates[1:] + [last_date])
 df.sort_index(inplace=True)
 df.cumsum().plot()
-plt.title('Prod factors model Linear Regression (rank 100)')
+plt.title('Prod factors model {1} ({0})'.format(method, model.__class__.__name__))
 plt.show()