Commit 69471996 authored by Dr.李's avatar Dr.李

update strategy

parent 8c79003a
...@@ -230,7 +230,7 @@ class SqlEngine(object): ...@@ -230,7 +230,7 @@ class SqlEngine(object):
risk_factors=df[neutralized_risks].values, risk_factors=df[neutralized_risks].values,
post_process=post_process) post_process=post_process)
return df[['code', 'dx']] return df[['code', 'dx']].drop_duplicates(['code'])
def fetch_dx_return_range(self, def fetch_dx_return_range(self,
universe, universe,
...@@ -271,7 +271,7 @@ class SqlEngine(object): ...@@ -271,7 +271,7 @@ class SqlEngine(object):
if dates: if dates:
df = df[df.trade_date.isin(dates)] df = df[df.trade_date.isin(dates)]
return df.sort_values(['trade_date', 'code']) return df.sort_values(['trade_date', 'code']).drop_duplicates(['trade_date', 'code'])
def fetch_dx_return_index(self, def fetch_dx_return_index(self,
ref_date: str, ref_date: str,
...@@ -373,14 +373,10 @@ class SqlEngine(object): ...@@ -373,14 +373,10 @@ class SqlEngine(object):
.set_index('trade_date') .set_index('trade_date')
res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan)
for col in res.columns: res['isOpen'] = df.isOpen.astype(bool)
if col not in set(['code', 'isOpen']) and col not in df.columns: res = res.loc[ref_date]
df[col] = res[col].values res.index = list(range(len(res)))
return df.drop_duplicates(['trade_date', 'code'])
df['isOpen'] = df.isOpen.astype(bool)
df = df.loc[ref_date]
df.index = list(range(len(df)))
return df
def fetch_factor_range(self, def fetch_factor_range(self,
universe: Universe, universe: Universe,
...@@ -441,13 +437,9 @@ class SqlEngine(object): ...@@ -441,13 +437,9 @@ class SqlEngine(object):
df.set_index('trade_date', inplace=True) df.set_index('trade_date', inplace=True)
res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan) res = transformer.transform('code', df).replace([-np.inf, np.inf], np.nan)
for col in res.columns: res['isOpen'] = df.isOpen.astype(bool)
if col not in set(['code', 'isOpen']) and col not in df.columns: res = res.reset_index()
df[col] = res[col].values return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
df['isOpen'] = df.isOpen.astype(bool)
df = df.reset_index()
return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')
def fetch_factor_range_forward(self, def fetch_factor_range_forward(self,
universe: Universe, universe: Universe,
...@@ -497,7 +489,7 @@ class SqlEngine(object): ...@@ -497,7 +489,7 @@ class SqlEngine(object):
df = pd.read_sql(query, self.engine) \ df = pd.read_sql(query, self.engine) \
.replace([-np.inf, np.inf], np.nan) \ .replace([-np.inf, np.inf], np.nan) \
.sort_values(['trade_date', 'code']) .sort_values(['trade_date', 'code'])
return pd.merge(df, codes[['trade_date', 'code']], how='inner') return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
def fetch_benchmark(self, def fetch_benchmark(self,
ref_date: str, ref_date: str,
...@@ -571,7 +563,7 @@ class SqlEngine(object): ...@@ -571,7 +563,7 @@ class SqlEngine(object):
risk_exp = pd.read_sql(query, self.engine).dropna() risk_exp = pd.read_sql(query, self.engine).dropna()
return risk_cov, risk_exp return risk_cov, risk_exp.drop_duplicates(['code'])
def fetch_risk_model_range(self, def fetch_risk_model_range(self,
universe: Universe, universe: Universe,
...@@ -631,7 +623,7 @@ class SqlEngine(object): ...@@ -631,7 +623,7 @@ class SqlEngine(object):
risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values( risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(
['trade_date', 'code']) ['trade_date', 'code'])
return risk_cov, risk_exp return risk_cov, risk_exp.drop_duplicates(['trade_date', 'code'])
def fetch_industry(self, def fetch_industry(self,
ref_date: str, ref_date: str,
...@@ -653,7 +645,7 @@ class SqlEngine(object): ...@@ -653,7 +645,7 @@ class SqlEngine(object):
) )
).distinct() ).distinct()
return pd.read_sql(query, self.engine).dropna() return pd.read_sql(query, self.engine).dropna().drop_duplicates(['code'])
def fetch_industry_matrix(self, def fetch_industry_matrix(self,
ref_date: str, ref_date: str,
...@@ -695,7 +687,7 @@ class SqlEngine(object): ...@@ -695,7 +687,7 @@ class SqlEngine(object):
if universe.is_filtered: if universe.is_filtered:
codes = universe.query(self, start_date, end_date, dates) codes = universe.query(self, start_date, end_date, dates)
df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code']) df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
return df return df.drop_duplicates(['trade_date', 'code'])
def fetch_industry_matrix_range(self, def fetch_industry_matrix_range(self,
universe: Universe, universe: Universe,
...@@ -708,7 +700,7 @@ class SqlEngine(object): ...@@ -708,7 +700,7 @@ class SqlEngine(object):
df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level) df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
df['industry_name'] = df['industry'] df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="") df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
return df.drop('industry_code', axis=1) return df.drop('industry_code', axis=1).drop_duplicates(['trade_date', 'code'])
def fetch_trade_status(self, def fetch_trade_status(self,
ref_date: str, ref_date: str,
...@@ -935,31 +927,6 @@ class SqlEngine(object): ...@@ -935,31 +927,6 @@ class SqlEngine(object):
self.engine.execute(query) self.engine.execute(query)
df.to_sql(Performance.__table__.name, self.engine, if_exists='append', index=False) df.to_sql(Performance.__table__.name, self.engine, if_exists='append', index=False)
def upsert_positions(self, ref_date, df):
universes = df.universe.unique().tolist()
benchmarks = df.benchmark.unique().tolist()
build_types = df.type.unique().tolist()
sources = df.source.unique().tolist()
portfolios = df.portfolio.unique().tolist()
query = delete(Positions).where(
and_(
Positions.trade_date == ref_date,
Positions.type.in_(build_types),
Positions.universe.in_(universes),
Positions.benchmark.in_(benchmarks),
Positions.source.in_(sources),
Positions.portfolio.in_(portfolios)
)
)
self.engine.execute(query)
df.to_sql(Positions.__table__.name,
self.engine,
if_exists='append',
index=False,
dtype={'weight': sa.types.JSON})
def fetch_outright_status(self, ref_date: str, is_open=True, ignore_internal_borrow=False): def fetch_outright_status(self, ref_date: str, is_open=True, ignore_internal_borrow=False):
table = Outright table = Outright
if is_open: if is_open:
......
...@@ -106,8 +106,6 @@ class LinearConstraints(object): ...@@ -106,8 +106,6 @@ class LinearConstraints(object):
bounds: Dict[str, BoxBoundary], bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame, cons_mat: pd.DataFrame,
backbone: np.ndarray=None): backbone: np.ndarray=None):
pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns))) self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns)))
self.bounds = bounds self.bounds = bounds
self.cons_mat = cons_mat self.cons_mat = cons_mat
......
...@@ -35,7 +35,7 @@ def linear_builder(er: np.ndarray, ...@@ -35,7 +35,7 @@ def linear_builder(er: np.ndarray,
if isinstance(ubound, float): if isinstance(ubound, float):
ubound = np.ones(n) * ubound ubound = np.ones(n) * ubound
if not turn_over_target: if not turn_over_target or current_position is None:
cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1) cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1)
opt = LPOptimizer(cons_matrix, lbound, ubound, -er, method) opt = LPOptimizer(cons_matrix, lbound, ubound, -er, method)
......
...@@ -24,24 +24,6 @@ from alphamind.analysis.factoranalysis import er_portfolio_analysis ...@@ -24,24 +24,6 @@ from alphamind.analysis.factoranalysis import er_portfolio_analysis
all_styles = risk_styles + industry_styles + macro_styles all_styles = risk_styles + industry_styles + macro_styles
total_risk_names = ['benchmark', 'total']
b_type = []
l_val = []
u_val = []
for name in total_risk_names:
if name == 'benchmark':
b_type.append(BoundaryType.RELATIVE)
l_val.append(0.8)
u_val.append(1.0)
else:
b_type.append(BoundaryType.RELATIVE)
l_val.append(1.0)
u_val.append(1.0)
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
class RunningSetting(object): class RunningSetting(object):
...@@ -51,19 +33,23 @@ class RunningSetting(object): ...@@ -51,19 +33,23 @@ class RunningSetting(object):
end_date, end_date,
freq, freq,
benchmark=905, benchmark=905,
weights_bandwidth=0.02,
industry_cat='sw_adj', industry_cat='sw_adj',
industry_level=1, industry_level=1,
rebalance_method='risk_neutral', rebalance_method='risk_neutral',
bounds=None,
**kwargs): **kwargs):
self.universe = universe self.universe = universe
self.dates = makeSchedule(start_date, end_date, freq, 'china.sse') self.dates = makeSchedule(start_date, end_date, freq, 'china.sse')
self.dates = [d.strftime('%Y-%m-%d') for d in self.dates] self.dates = [d.strftime('%Y-%m-%d') for d in self.dates]
self.benchmark = benchmark self.benchmark = benchmark
self.weights_bandwidth = weights_bandwidth
self.horizon = map_freq(freq) self.horizon = map_freq(freq)
self.executor = NaiveExecutor() self.executor = NaiveExecutor()
self.industry_cat = industry_cat self.industry_cat = industry_cat
self.industry_level = industry_level self.industry_level = industry_level
self.rebalance_method = rebalance_method self.rebalance_method = rebalance_method
self.bounds = bounds
self.more_opts = kwargs self.more_opts = kwargs
...@@ -79,7 +65,6 @@ class Strategy(object): ...@@ -79,7 +65,6 @@ class Strategy(object):
self.engine = self.data_meta.engine self.engine = self.data_meta.engine
def run(self): def run(self):
alpha_logger.info("starting backting ...") alpha_logger.info("starting backting ...")
total_factors = self.engine.fetch_factor_range(self.running_setting.universe, total_factors = self.engine.fetch_factor_range(self.running_setting.universe,
...@@ -116,10 +101,16 @@ class Strategy(object): ...@@ -116,10 +101,16 @@ class Strategy(object):
total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']).fillna(total_data.median()) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']).fillna(total_data.median())
is_in_benchmark = (total_data.weight > 0.).astype(float).reshape((-1, 1))
total_data.loc[:, 'benchmark'] = is_in_benchmark
total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark)
total_data.sort_values(['trade_date', 'code'], inplace=True)
total_data_groups = total_data.groupby('trade_date') total_data_groups = total_data.groupby('trade_date')
rets = [] rets = []
turn_overs = [] turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
executor = copy.deepcopy(self.running_setting.executor) executor = copy.deepcopy(self.running_setting.executor)
positions = pd.DataFrame() positions = pd.DataFrame()
...@@ -135,53 +126,61 @@ class Strategy(object): ...@@ -135,53 +126,61 @@ class Strategy(object):
sec_cov = None sec_cov = None
benchmark_w = this_data.weight.values benchmark_w = this_data.weight.values
is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1)) constraints = LinearConstraints(self.running_setting.bounds,
constraints_exp = np.concatenate([is_in_benchmark, this_data,
np.ones_like(is_in_benchmark)], benchmark_w)
axis=1)
constraints_exp = pd.DataFrame(constraints_exp, columns=['benchmark', 'total'])
constraints = LinearConstraints(bounds, constraints_exp, benchmark_w)
lbound = np.maximum(0., benchmark_w - 0.02) lbound = np.maximum(0., benchmark_w - self.running_setting.weights_bandwidth)
ubound = 0.02 + benchmark_w ubound = self.running_setting.weights_bandwidth + benchmark_w
if previous_pos.empty:
current_position = None
else:
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
current_position = remained_pos.weight.values
features = new_model.features features = new_model.features
raw_factors = this_data[features].values raw_factors = this_data[features].values
new_factors = factor_processing(raw_factors, new_factors = factor_processing(raw_factors,
pre_process=self.data_meta.pre_process, pre_process=self.data_meta.pre_process,
risk_factors=self.data_meta.neutralized_risk, risk_factors=this_data[self.data_meta.neutralized_risk].values.astype(float) if self.data_meta.neutralized_risk else None,
post_process=self.data_meta.post_process) post_process=self.data_meta.post_process)
er = new_model.predict(pd.DataFrame(new_factors, columns=features)) er = new_model.predict(pd.DataFrame(new_factors, columns=features))
alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er))) alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er)))
target_pos, _ = er_portfolio_analysis(er, target_pos = self._calculate_pos(er,
this_data.industry_name.values, this_data,
None, constraints,
constraints, benchmark_w,
False, lbound,
benchmark_w, ubound,
method=self.running_setting.rebalance_method, sec_cov=sec_cov,
lbound=lbound, current_position=current_position,
ubound=ubound, **self.running_setting.more_opts)
target_vol=0.05,
cov=sec_cov)
target_pos['code'] = codes target_pos['code'] = codes
target_pos['trade_date'] = ref_date target_pos['trade_date'] = ref_date
target_pos['benchmark_weight'] = benchmark_w
target_pos['dx'] = this_data.dx.values
turn_over, executed_pos = executor.execute(target_pos=target_pos) turn_over, executed_pos = executor.execute(target_pos=target_pos)
leverage = executed_pos.weight.abs().sum()
ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.)
rets.append(np.log(1. + ret)) rets.append(np.log(1. + ret))
executor.set_current(executed_pos) executor.set_current(executed_pos)
turn_overs.append(turn_over) turn_overs.append(turn_over)
positions = positions.append(target_pos) leverags.append(leverage)
positions = positions.append(executed_pos)
previous_pos = executed_pos
positions['benchmark_weight'] = total_data['weight'].values
positions['dx'] = total_data.dx.values
trade_dates = positions.trade_date.unique() trade_dates = positions.trade_date.unique()
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=trade_dates) ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=trade_dates)
index_return = self.engine.fetch_dx_return_index_range(self.running_setting.benchmark, index_return = self.engine.fetch_dx_return_index_range(self.running_setting.benchmark,
dates=self.running_setting.dates, dates=self.running_setting.dates,
...@@ -191,7 +190,7 @@ class Strategy(object): ...@@ -191,7 +190,7 @@ class Strategy(object):
ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], freq)] = 0. ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], freq)] = 0.
ret_df = ret_df.shift(1) ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0. ret_df.iloc[0] = 0.
ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] * ret_df['leverage']
return ret_df, positions return ret_df, positions
...@@ -203,6 +202,22 @@ class Strategy(object): ...@@ -203,6 +202,22 @@ class Strategy(object):
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000 sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
return sec_cov return sec_cov
def _calculate_pos(self, er, data, constraints, benchmark_w, lbound, ubound, **kwargs):
target_pos, _ = er_portfolio_analysis(er,
industry=data.industry_name.values,
dx_return=None,
constraints=constraints,
detail_analysis=False,
benchmark=benchmark_w,
method=self.running_setting.rebalance_method,
lbound=lbound,
ubound=ubound,
current_position=kwargs.get('current_position'),
target_vol=kwargs.get('target_vol'),
cov=kwargs.get('sec_cov'),
turn_over_target=kwargs.get('turn_over_target'))
return target_pos
if __name__ == '__main__': if __name__ == '__main__':
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
...@@ -210,30 +225,127 @@ if __name__ == '__main__': ...@@ -210,30 +225,127 @@ if __name__ == '__main__':
from alphamind.api import Universe from alphamind.api import Universe
from alphamind.api import ConstLinearModel from alphamind.api import ConstLinearModel
from alphamind.api import DataMeta from alphamind.api import DataMeta
from alphamind.api import industry_list
from alphamind.api import winsorize_normal
from alphamind.api import standardize
start_date = '2010-01-01' start_date = '2010-01-01'
end_date = '2018-04-19' end_date = '2018-04-27'
freq = '10b' freq = '10b'
neutralized_risk = None neutralized_risk = None
universe = Universe("custom", ['zz800']) universe = Universe("custom", ['zz800', 'cyb'])
alpha_factors = {
'f01': CSQuantiles(LAST('ep_q'), groups='sw1_adj'),
'f02': CSQuantiles(LAST('roe_q'), groups='sw1_adj'),
'f03': CSQuantiles(LAST('SGRO'), groups='sw1_adj'),
'f04': CSQuantiles(LAST('GREV'), groups='sw1_adj'),
'f05': CSQuantiles(LAST('con_peg_rolling'), groups='sw1_adj'),
'f06': CSQuantiles(LAST('con_pe_rolling_order'), groups='sw1_adj'),
'f07': CSQuantiles(LAST('IVR'), groups='sw1_adj'),
'f08': CSQuantiles(LAST('ILLIQUIDITY'), groups='sw1_adj'),
'f09': CSQuantiles(LAST('DividendPaidRatio'), groups='sw1_adj'),
}
# alpha_factors = {
# 'f01': LAST('ep_q'),
# 'f02': LAST('roe_q'),
# 'f03': LAST('SGRO'),
# 'f04': LAST('GREV'),
# 'f05': LAST('con_target_price'),
# 'f06': LAST('con_pe_rolling_order'),
# 'f07': LAST('IVR'),
# 'f08': LAST('ILLIQUIDITY'),
# }
weights = dict(
f01=0.5,
f02=1.,
f03=1.,
f04=1.,
f05=-1.,
f06=-0.5,
f07=0.5,
f08=0.5,
f09=0.5
)
factor = 'RVOL'
alpha_factors = {'f01': CSQuantiles(LAST(factor), groups='sw1_adj')}
weights = {'f01': 1.}
alpha_model = ConstLinearModel(features=alpha_factors, weights=weights) alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
data_meta = DataMeta(freq=freq, data_meta = DataMeta(freq=freq,
universe=universe, universe=universe,
batch=1) batch=1,
neutralized_risk=None, # industry_styles,
pre_process=None, # [winsorize_normal, standardize],
post_process=None) # [standardize])
industries = industry_list('sw_adj', 1)
total_risk_names = ['benchmark', 'total'] + \
['EARNYILD', 'LIQUIDTY', 'GROWTH', 'SIZE', 'BETA', 'MOMENTUM'] + \
industry_list('sw_adj', 1)
b_type = []
l_val = []
u_val = []
for name in total_risk_names:
if name == 'benchmark':
b_type.append(BoundaryType.RELATIVE)
l_val.append(0.8)
u_val.append(1.0)
elif name == 'total':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(.0)
u_val.append(.0)
elif name == 'EARNYILD':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0.00)
u_val.append(0.20)
elif name == 'GROWTH':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0.00)
u_val.append(0.20)
elif name == 'MOMENTUM':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0.20)
u_val.append(0.20)
elif name == 'SIZE':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(-0.2)
u_val.append(0.0)
elif name == 'LIQUIDTY':
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(-0.40)
u_val.append(-0.20)
elif name in ["银行", "保险", "证券", "多元金融"]:
b_type.append(BoundaryType.RELATIVE)
l_val.append(0.8)
u_val.append(0.8)
elif name in ["计算机", "医药生物", "国防军工", "信息服务", "机械设备"]:
b_type.append(BoundaryType.RELATIVE)
l_val.append(1.0)
u_val.append(2.0)
else:
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0)
u_val.append(0)
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
running_setting = RunningSetting(universe, running_setting = RunningSetting(universe,
start_date, start_date,
end_date, end_date,
freq, freq,
rebalance_method='tv') benchmark=905,
weights_bandwidth=0.01,
rebalance_method='risk_neutral',
bounds=bounds,
target_vol=0.045,
turn_over_target=0.4)
strategy = Strategy(alpha_model, data_meta, running_setting) strategy = Strategy(alpha_model, data_meta, running_setting)
ret_df, positions = strategy.run() ret_df, positions = strategy.run()
ret_df['excess_return'].cumsum().plot() ret_df[['excess_return', 'turn_over']].cumsum().plot(secondary_y='turn_over')
plt.title(f"{factor}") plt.title(f"{alpha_factors.keys()}")
plt.show() plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment