Commit 8c8d4550 authored by Dr.李's avatar Dr.李

update strategy

parent 4c100448
......@@ -5,41 +5,100 @@ Created on 2017-9-5
@author: cheng.li
"""
import sqlalchemy as sa
import arrow
import numpy as np
import math
import pandas as pd
import numpy as np
from PyFin.api import *
from alphamind.api import *
from alphamind.data.dbmodel.models import Models
from alphamind.model.linearmodel import LinearRegression
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
factor = 'ROE'
universe = Universe('custom', ['zz800'])
start_date = '2010-01-01'
end_date = '2018-04-26'
freq = '10b'
category = 'sw_adj'
level = 1
horizon = map_freq(freq)
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
def factor_analysis(factor):
engine = SqlEngine()
factors = {
'f1': CSQuantiles(factor),
'f2': CSQuantiles(factor, groups='sw1_adj'),
'f3': LAST(factor)
}
total_factor = engine.fetch_factor_range(universe, factors, dates=ref_dates)
_, risk_exp = engine.fetch_risk_model_range(universe, dates=ref_dates)
industry = engine.fetch_industry_range(universe, dates=ref_dates, category=category, level=level)
rets = engine.fetch_dx_return_range(universe, horizon=horizon, offset=1, dates=ref_dates)
total_factor = pd.merge(total_factor, industry[['trade_date', 'code', 'industry']], on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, risk_exp, on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, rets, on=['trade_date', 'code']).dropna()
df_ret = pd.DataFrame(columns=['f1', 'f2', 'f3'])
df_ic = pd.DataFrame(columns=['f1', 'f2', 'f3'])
total_factor_groups = total_factor.groupby('trade_date')
for date, this_factors in total_factor_groups:
raw_factors = this_factors['f3'].values
industry_exp = this_factors[industry_styles + ['COUNTRY']].values.astype(float)
processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp,
post_process=[percentile])
this_factors['f3'] = processed_values
factor_values = this_factors[['f1', 'f2', 'f3']].values
positions = (factor_values >= 0.8) * 1.
positions[factor_values <= 0.2] = -1
positions /= np.abs(positions).sum(axis=0)
ret_values = this_factors.dx.values @ positions
df_ret.loc[date] = ret_values
ic_values = this_factors[['dx', 'f1', 'f2', 'f3']].corr().values[0, 1:]
df_ic.loc[date] = ic_values
x = np.random.randn(1000, 3)
y = np.random.randn(1000)
print(f"{factor} is finished")
model = LinearRegression(['a', 'b', 'c'])
model.fit(x, y)
return {'ic': (df_ic.mean(axis=0), df_ic.std(axis=0) / math.sqrt(len(df_ic))),
'ret': (df_ret.mean(axis=0), df_ret.std(axis=0) / math.sqrt(len(df_ic))),
'factor': factor}
model_desc = model.save()
df = pd.DataFrame()
if __name__ == '__main__':
new_row = dict(trade_date='2017-09-05',
portfolio_name='test',
model_type='LinearRegression',
version=1,
model_desc=model_desc,
update_time=arrow.now().format())
from dask.distributed import Client
df = df.append([new_row])
try:
client = Client("10.63.6.176:8786")
cols = pd.MultiIndex.from_product([['mean', 'std'], ['raw', 'peer', 'neutralized']])
factors_ret = pd.DataFrame(columns=cols)
factors_ic = pd.DataFrame(columns=cols)
df.to_sql(Models.__table__.name, engine.engine,
if_exists='append',
index=False,
dtype={'model_desc': sa.types.JSON})
factors = ['ep_q',
'roe_q',
'SGRO',
'GREV',
'IVR',
'ILLIQUIDITY',
'con_target_price',
'con_pe_rolling_order',
'DividendPaidRatio']
l = client.map(factor_analysis, factors)
results = client.gather(l)
model_in_db = engine.fetch_model('2017-09-05')
for res in results:
factor = res['factor']
factors_ret.loc[factor, 'mean'] = res['ret'][0].values
factors_ret.loc[factor, 'std'] = res['ret'][1].values
print(model_in_db)
factors_ic.loc[factor, 'mean'] = res['ic'][0].values
factors_ic.loc[factor, 'std'] = res['ic'][1].values
print(factors_ret)
finally:
client.close()
......@@ -104,6 +104,7 @@ class Strategy(object):
total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left')
total_data.fillna({'weight': 0.}, inplace=True)
total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code'])
is_in_benchmark = (total_data.weight > 0.).astype(float).reshape((-1, 1))
total_data.loc[:, 'benchmark'] = is_in_benchmark
......@@ -134,7 +135,7 @@ class Strategy(object):
for ref_date, this_data in total_data_groups:
new_model = models[ref_date]
this_data.fillna(total_data.median(), inplace=True)
this_data = this_data.fillna(this_data[new_model.features].median())
codes = this_data.code.values.tolist()
if self.running_setting.rebalance_method == 'tv':
......@@ -251,35 +252,34 @@ if __name__ == '__main__':
start_date = '2011-01-01'
end_date = '2018-05-04'
freq = '5b'
freq = '20b'
neutralized_risk = None
universe = Universe("custom", ['zz800', 'cyb', 'zz1000'])
universe = Universe("custom", ['zz800'])
dask_client = Client('10.63.6.176:8786')
factor = CSQuantiles(LAST('NetProfitRatio'),
groups='sw1_adj')
alpha_factors = {
'f01': CSQuantiles(LAST('ep_q'), groups='sw1_adj'),
'f02': CSQuantiles(LAST('roe_q'), groups='sw1_adj'),
'f03': CSQuantiles(LAST('SGRO'), groups='sw1_adj'),
'f04': CSQuantiles(LAST('GREV'), groups='sw1_adj'),
'f05': CSQuantiles(LAST('con_peg_rolling'), groups='sw1_adj'),
'f06': CSQuantiles(LAST('con_pe_rolling_order'), groups='sw1_adj'),
'f07': CSQuantiles(LAST('IVR'), groups='sw1_adj'),
'f08': CSQuantiles(LAST('ILLIQUIDITY'), groups='sw1_adj'),
'f09': CSQuantiles(LAST('DividendPaidRatio'), groups='sw1_adj'),
str(factor): factor,
}
alpha_model = XGBTrainer(objective='reg:linear',
booster='gbtree',
n_estimators=300,
eval_sample=0.25,
features=alpha_factors)
weights = {str(factor): 1.}
# alpha_model = XGBTrainer(objective='reg:linear',
# booster='gbtree',
# n_estimators=300,
# eval_sample=0.25,
# features=alpha_factors)
alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
data_meta = DataMeta(freq=freq,
universe=universe,
batch=32,
neutralized_risk=None, # industry_styles,
pre_process=None, # [winsorize_normal, standardize],
post_process=None) # [standardize])
post_process=None,
warm_start=12) # [standardize])
industries = industry_list('sw_adj', 1)
......@@ -301,7 +301,7 @@ if __name__ == '__main__':
start_date,
end_date,
freq,
benchmark=905,
benchmark=906,
weights_bandwidth=0.01,
rebalance_method='tv',
bounds=bounds,
......@@ -311,5 +311,5 @@ if __name__ == '__main__':
strategy = Strategy(alpha_model, data_meta, running_setting, dask_client=dask_client)
ret_df, positions = strategy.run()
ret_df[['excess_return', 'turn_over']].cumsum().plot(secondary_y='turn_over')
plt.title(f"{alpha_factors.keys()}")
plt.title(f"{str(factor)[20:40]}")
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment