Commit fc7ea7d4 authored by Dr.李's avatar Dr.李

update models

parent 56273087
......@@ -1687,13 +1687,12 @@ class FullFactorView(Base):
class Models(Base):
__tablename__ = 'models'
__table_args__ = (
Index('model_pk', 'trade_date', 'portfolio_name', 'model_type', 'version', unique=True),
Index('model_pk', 'trade_date', 'model_type', 'model_version', unique=True),
)
trade_date = Column(DateTime, primary_key=True, nullable=False)
portfolio_name = Column(String(30), primary_key=True, nullable=False)
model_type = Column(String(30), primary_key=True, nullable=False)
version = Column(BigInteger, primary_key=True, nullable=False)
model_version = Column(BigInteger, primary_key=True, nullable=False)
update_time = Column(DateTime, nullable=False)
model_desc = Column(JSON, nullable=False)
......
......@@ -16,8 +16,18 @@ strategies = {
'prod': {
# 'factors': ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
# 'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
'factors': ['CHV'],
'weights': [1.]
#'factors': ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
#'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
'factors': ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO'],
'weights': [0.034129344,
0.015881607,
0.048765746,
0.042747382,
-0.015900173,
0.019044573,
-0.001792638,
0.014277867,
]
},
# 'candidate': {
# 'factors': ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'GREV', 'ROEDiluted'],
......@@ -31,19 +41,24 @@ universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = industry_styles
constraint_risk = industry_styles
freq = '1w'
freq = '2w'
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 8
elif freq == '3w':
horizon = 12
elif freq == '1d':
horizon = 0
dates = makeSchedule('2017-01-01',
'2017-08-20',
'2017-09-05',
tenor=freq,
calendar='china.sse')
calendar='china.sse',
dateGenerationRule=DateGeneration.Forward)
total_data_dict = {}
......@@ -107,10 +122,10 @@ for strategy in strategies:
ret_df = pd.DataFrame(total_data_dict, index=dates)
ret_df.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
ret_df.loc[start_date] = 0.
ret_df.sort_index(inplace=True)
ret_df.iloc[0] = 0.
ret_df.cumsum().plot(figsize=(12, 6))
plt.savefig("backtest_big_universe_20170814.png")
......
......@@ -9,6 +9,9 @@ import numpy as np
import pandas as pd
import copy
from sklearn.linear_model import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import NuSVR
from alphamind.api import *
from PyFin.api import *
from matplotlib import pyplot as plt
......@@ -34,14 +37,14 @@ universe = Universe('zz500', ['zz500'])
neutralize_risk = ['SIZE'] + industry_styles
portfolio_risk_neutralize = ['SIZE']
portfolio_industry_neutralize = True
alpha_factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
alpha_factors = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'] # ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
benchmark = 905
n_bins = 5
frequency = '1w'
batch = 8
start_date = '2017-01-01'
end_date = '2017-09-03'
method = 'rank'
frequency = '2w'
batch = 1
start_date = '2017-01-05'
end_date = '2017-09-05'
method = 'risk_neutral'
use_rank = 100
'''
......@@ -58,7 +61,7 @@ data_package = fetch_data_package(engine,
batch=batch,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[standardize],
post_process=[winsorize_normal, standardize],
warm_start=8)
'''
......@@ -70,15 +73,20 @@ train_y = data_package['train']['y']
dates = sorted(train_x.keys())
model = LinearRegression(fit_intercept=False)
model_df = pd.Series()
for train_date in dates:
#model = LinearRegression(alpha_factors, fit_intercept=False)
#model = LassoCV(fit_intercept=False)
#model = AdaBoostRegressor(n_estimators=100)
#model = RandomForestRegressor(n_estimators=100, n_jobs=4)
#model = NuSVR(kernel='rbf', C=1e-3, gamma=0.1)
model = ConstLinearModel(alpha_factors, np.array([0.05, 0.3, 0.35, 0.075, 0.15, 0.05]))
x = train_x[train_date]
y = train_y[train_date]
model.fit(x, y)
model_df.loc[train_date] = copy.deepcopy(model)
model_df.loc[train_date] = model
alpha_logger.info('trade_date: {0} training finished'.format(train_date))
'''
......@@ -118,7 +126,7 @@ settlement = data_package['settlement']
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
industry_dummies = pd.get_dummies(settlement['industry_code'].values)
industry_dummies = pd.get_dummies(settlement['industry'].values)
risk_styles = settlement[portfolio_risk_neutralize].values
final_res = np.zeros(len(dates))
......@@ -133,6 +141,9 @@ for i, predict_date in enumerate(dates):
industry_names = settlement[index]['industry'].values
is_tradable = settlement[index]['isOpen'].values
cons.add_exposure(['total'], np.ones((len(is_tradable), 1)))
cons.set_constraints('total', benchmark_w.sum(), benchmark_w.sum())
if portfolio_industry_neutralize:
ind_exp = industry_dummies[index]
......@@ -177,6 +188,6 @@ last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
df = pd.Series(final_res, index=dates[1:] + [last_date])
df.sort_index(inplace=True)
df.cumsum().plot()
plt.title('Prod factors model Linear Regression (rank 100)')
plt.title('Prod factors model {1} ({0})'.format(method, model.__class__.__name__))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment