Commit 7b4198be authored by Dr.李's avatar Dr.李

reorder the priority

parent 0c7657eb
......@@ -36,6 +36,9 @@ class Experimental(Base):
DROE = Column(Float(53))
IVR = Column(Float(53))
ROEAfterNonRecurring = Column(Float(53))
EPAfterNonRecurring = Column(Float(53))
DROEAfterNonRecurring = Column(Float(53))
CFinc1 = Column(Float(53))
class FactorMaster(Base):
......
......@@ -84,7 +84,7 @@ macro_styles = ['COUNTRY']
total_risk_factors = risk_styles + industry_styles + macro_styles
factor_tables = [Uqer, Tiny, LegacyFactor, Experimental, RiskExposure, Market]
factor_tables = [Uqer, LegacyFactor, Tiny, Experimental, RiskExposure, Market]
def append_industry_info(df):
......@@ -543,7 +543,7 @@ if __name__ == '__main__':
data2 = engine.fetch_industry(ref_date=ref_date,
codes=codes)
data2 = engine.fetch_data_range(universe,
factors=['EPS'],
factors=None,
start_date=start_date,
end_date=end_date)
print(codes)
......
......@@ -32,7 +32,8 @@ training - every 4 week
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('zz500', ['zz500'])
neutralize_risk = industry_styles
alpha_factors = ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
alpha_factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'GREV',
'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
benchmark = 905
n_bins = 5
frequency = '1w'
......@@ -54,7 +55,8 @@ data_package = fetch_data_package(engine,
batch=batch,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[standardize])
post_process=[standardize],
warm_start=20)
'''
training phase: using Linear - regression from scikit-learn
......@@ -83,31 +85,73 @@ predicting phase: using trained model on the re-balance dates
predict_x = data_package['predict']['x']
settlement = data_package['settlement']
final_res = np.zeros((len(dates), n_bins))
# final_res = np.zeros((len(dates), n_bins))
#
# for i, predict_date in enumerate(dates):
# model = model_df[predict_date]
# x = predict_x[predict_date]
# benchmark_w = settlement[settlement.trade_date == predict_date]['weight'].values
# realized_r = settlement[settlement.trade_date == predict_date]['dx'].values
#
# predict_y = model.predict(x)
#
# res = er_quantile_analysis(predict_y,
# n_bins,
# dx_return=realized_r,
# benchmark=benchmark_w)
#
# final_res[i] = res / benchmark_w.sum()
# print('trade_date: {0} predicting finished'.format(train_date))
#
# last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
#
# df = pd.DataFrame(final_res, index=dates[1:] + [last_date])
# df.sort_index(inplace=True)
# df.cumsum().plot()
# plt.title('Risk style factors model training with Linear Regression from 2012 - 2017')
# plt.show()
'''
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
industry_dummies = pd.get_dummies(settlement['industry_code'].values)
final_res = np.zeros(len(dates))
for i, predict_date in enumerate(dates):
model = model_df[predict_date]
x = predict_x[predict_date]
benchmark_w = settlement[settlement.trade_date == predict_date]['weight'].values
realized_r = settlement[settlement.trade_date == predict_date]['dx'].values
cons = Constraints()
index = settlement.trade_date == predict_date
benchmark_w = settlement[index]['weight'].values
realized_r = settlement[index]['dx'].values
industry_names = settlement[index]['industry'].values
is_tradable = settlement[index]['isOpen'].values
ind_exp = industry_dummies[index]
predict_y = model.predict(x)
risk_tags = ind_exp.columns
cons.add_exposure(risk_tags, ind_exp.values)
benchmark_exp = benchmark_w @ ind_exp.values
res = er_quantile_analysis(predict_y,
n_bins,
dx_return=realized_r,
benchmark=benchmark_w)
for k, name in enumerate(risk_tags):
cons.set_constraints(name, benchmark_exp[k], benchmark_exp[k])
final_res[i] = res / benchmark_w.sum()
print('trade_date: {0} predicting finished'.format(train_date))
predict_y = model.predict(x)
weights, analysis = er_portfolio_analysis(predict_y,
industry_names,
realized_r,
constraints=cons,
detail_analysis=True,
benchmark=benchmark_w,
is_tradable=is_tradable)
final_res[i] = analysis['er']['total'] / benchmark_w.sum()
last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
df = pd.DataFrame(final_res, index=dates[1:] + [last_date])
df = pd.Series(final_res, index=dates[1:] + [last_date])
df.sort_index(inplace=True)
df.cumsum().plot()
plt.title('Risk style factors model training with Linear Regression from 2012 - 2017')
plt.show()
df = df.cumsum()
df.to_csv('d:/20120101_20170823_bt.csv')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment