Commit 093c1659 authored by Dr.李's avatar Dr.李

update schedule setting

parent b9b8fdd2
...@@ -50,6 +50,8 @@ def map_freq(freq): ...@@ -50,6 +50,8 @@ def map_freq(freq):
horizon = 19 horizon = 19
elif freq == '1d': elif freq == '1d':
horizon = 0 horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else: else:
raise ValueError("Unrecognized freq: {0}".format(freq)) raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon return horizon
......
...@@ -184,18 +184,19 @@ class SqlEngine(object): ...@@ -184,18 +184,19 @@ class SqlEngine(object):
ref_date: str, ref_date: str,
codes: Iterable[int], codes: Iterable[int],
expiry_date: str = None, expiry_date: str = None,
horizon: int = 0) -> pd.DataFrame: horizon: int = 0,
offset: int = 0) -> pd.DataFrame:
start_date = ref_date start_date = ref_date
if not expiry_date: if not expiry_date:
end_date = advanceDateByCalendar('china.sse', ref_date, str(horizon + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d') end_date = advanceDateByCalendar('china.sse', ref_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
else: else:
end_date = expiry_date end_date = expiry_date
stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over( stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.code, partition_by=DailyReturn.code,
order_by=DailyReturn.trade_date, order_by=DailyReturn.trade_date,
rows=(DAILY_RETURN_OFFSET, horizon + DAILY_RETURN_OFFSET)).label('dx') rows=(DAILY_RETURN_OFFSET + offset, horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
query = select([DailyReturn.trade_date, DailyReturn.code, stats]).where( query = select([DailyReturn.trade_date, DailyReturn.code, stats]).where(
and_( and_(
...@@ -213,13 +214,14 @@ class SqlEngine(object): ...@@ -213,13 +214,14 @@ class SqlEngine(object):
start_date: str = None, start_date: str = None,
end_date: str = None, end_date: str = None,
dates: Iterable[str] = None, dates: Iterable[str] = None,
horizon: int = 0) -> pd.DataFrame: horizon: int = 0,
offset: int = 0) -> pd.DataFrame:
if dates: if dates:
start_date = dates[0] start_date = dates[0]
end_date = dates[-1] end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date, str(horizon + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') end_date = advanceDateByCalendar('china.sse', end_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
cond = universe.query_range(start_date, end_date) cond = universe.query_range(start_date, end_date)
big_table = join(DailyReturn, UniverseTable, big_table = join(DailyReturn, UniverseTable,
...@@ -230,7 +232,7 @@ class SqlEngine(object): ...@@ -230,7 +232,7 @@ class SqlEngine(object):
stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over( stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.code, partition_by=DailyReturn.code,
order_by=DailyReturn.trade_date, order_by=DailyReturn.trade_date,
rows=(DAILY_RETURN_OFFSET, horizon + DAILY_RETURN_OFFSET)).label('dx') rows=(offset + DAILY_RETURN_OFFSET, horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
query = select([DailyReturn.trade_date, DailyReturn.code, stats]) \ query = select([DailyReturn.trade_date, DailyReturn.code, stats]) \
.select_from(big_table) .select_from(big_table)
......
...@@ -25,18 +25,18 @@ end_date = '2017-11-15' ...@@ -25,18 +25,18 @@ end_date = '2017-11-15'
benchmark_code = 300 benchmark_code = 300
universe_name = ['zz500', 'hs300'] universe_name = ['zz500', 'hs300']
universe = Universe(universe_name, universe_name) universe = Universe(universe_name, universe_name)
frequency = '2w' frequency = '2b'
batch = 8 batch = 8
method = 'risk_neutral' method = 'risk_neutral'
use_rank = 100 use_rank = 100
industry_lower = 1. industry_lower = 1.
industry_upper = 1. industry_upper = 1.
neutralize_risk = ['SIZE'] + industry_styles neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = industry_styles constraint_risk = ['SIZE'] + industry_styles
size_risk_lower = 0 size_risk_lower = 0
size_risk_upper = 0 size_risk_upper = 0
turn_over_target_base = 0.2 turn_over_target_base = 0.1
weight_gap = 0.03 weight_gaps = [0.01, 0.02, 0.03, 0.04]
benchmark_total_lower = 0.8 benchmark_total_lower = 0.8
benchmark_total_upper = 1. benchmark_total_upper = 1.
horizon = map_freq(frequency) horizon = map_freq(frequency)
...@@ -101,7 +101,7 @@ for ref_date in ref_dates: ...@@ -101,7 +101,7 @@ for ref_date in ref_dates:
alpha_logger.info('trade_date: {0} training finished'.format(ref_date)) alpha_logger.info('trade_date: {0} training finished'.format(ref_date))
frequency = '1w' frequency = '2b'
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse') ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe, const_model_factor_data = engine.fetch_data_range(universe,
...@@ -117,187 +117,190 @@ Predicting and re-balance phase ...@@ -117,187 +117,190 @@ Predicting and re-balance phase
factor_groups = const_model_factor_data.groupby('trade_date') factor_groups = const_model_factor_data.groupby('trade_date')
rets = [] for weight_gap in weight_gaps:
turn_overs = [] print("start {0} weight gap simulation ...".format(weight_gap))
leverags = []
previous_pos = pd.DataFrame() rets = []
turn_overs = []
index_dates = [] leverags = []
previous_pos = pd.DataFrame()
for i, value in enumerate(factor_groups):
date = value[0] index_dates = []
data = value[1]
ref_date = date.strftime('%Y-%m-%d') for i, value in enumerate(factor_groups):
date = value[0]
total_data = data.fillna(data[total_features].median()) data = value[1]
alpha_logger.info('{0}: {1}'.format(date, len(total_data))) ref_date = date.strftime('%Y-%m-%d')
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values total_data = data.fillna(data[total_features].median())
benchmark_w = total_data.weight.values alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
constraint_exp = total_data[constraint_risk].values industry = total_data.industry_code.values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float) benchmark_w = total_data.weight.values
risk_names = constraint_risk + ['total'] constraint_exp = total_data[constraint_risk].values
risk_target = risk_exp_expand.T @ benchmark_w risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
lbound = np.maximum(0., hedging_ratio * benchmark_w - weight_gap) # np.zeros(len(total_data)) risk_names = constraint_risk + ['total']
ubound = weight_gap + hedging_ratio * benchmark_w risk_target = risk_exp_expand.T @ benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float) lbound = np.maximum(0., hedging_ratio * benchmark_w - weight_gap) # np.zeros(len(total_data))
ubound = weight_gap + hedging_ratio * benchmark_w
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total') is_in_benchmark = (benchmark_w > 0.).astype(float)
constraint = Constraints(risk_exp_expand, risk_names) risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
for i, name in enumerate(risk_names):
if name == 'total': constraint = Constraints(risk_exp_expand, risk_names)
constraint.set_constraints(name,
lower_bound=risk_target[i], for i, name in enumerate(risk_names):
upper_bound=risk_target[i]) if name == 'total':
elif name == 'SIZE': constraint.set_constraints(name,
base_target = abs(risk_target[i]) lower_bound=risk_target[i],
constraint.set_constraints(name, upper_bound=risk_target[i])
lower_bound=risk_target[i] + base_target * size_risk_lower, elif name == 'SIZE':
upper_bound=risk_target[i] + base_target * size_risk_upper) base_target = abs(risk_target[i])
elif name == 'benchmark_total': constraint.set_constraints(name,
base_target = benchmark_w.sum() lower_bound=risk_target[i] + base_target * size_risk_lower,
constraint.set_constraints(name, upper_bound=risk_target[i] + base_target * size_risk_upper)
lower_bound=benchmark_total_lower * base_target, elif name == 'benchmark_total':
upper_bound=benchmark_total_upper * base_target) base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
else:
constraint.set_constraints(name,
lower_bound=risk_target[i] * industry_lower,
upper_bound=risk_target[i] * industry_upper)
factor_values = factor_processing(total_data[const_features].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er1 = const_model.predict(factor_values)
# linear regression model
models = models_series[models_series.index <= date]
if models.empty:
continue
index_dates.append(date)
model = models[-1]
# x = predict_x[date]
x = factor_processing(total_data[linear_model_features].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
er2 = model.predict(x)
# combine model
er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values})
er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': total_data.code.values})
er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0)
er = (er_table.er1 + er_table.er2).values
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
else: else:
constraint.set_constraints(name, previous_pos.set_index('code', inplace=True)
lower_bound=risk_target[i] * industry_lower, remained_pos = previous_pos.loc[codes]
upper_bound=risk_target[i] * industry_upper)
remained_pos.fillna(0., inplace=True)
factor_values = factor_processing(total_data[const_features].values, turn_over_target = turn_over_target_base
pre_process=[winsorize_normal, standardize], current_position = remained_pos.weight.values
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize]) try:
target_pos, _ = er_portfolio_analysis(er,
# const linear model industry,
er1 = const_model.predict(factor_values) None,
constraint,
# linear regression model False,
models = models_series[models_series.index <= date] benchmark_w,
if models.empty: method=method,
continue use_rank=use_rank,
turn_over_target=turn_over_target,
index_dates.append(date) current_position=current_position,
model = models[-1] lbound=lbound,
ubound=ubound)
# x = predict_x[date] except ValueError:
x = factor_processing(total_data[linear_model_features].values, alpha_logger.info('{0} full re-balance'.format(date))
pre_process=[winsorize_normal, standardize], target_pos, _ = er_portfolio_analysis(er,
risk_factors=risk_exp, industry,
post_process=[winsorize_normal, standardize]) None,
er2 = model.predict(x) constraint,
False,
# combine model benchmark_w,
er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values}) method=method,
er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': total_data.code.values}) use_rank=use_rank,
er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0) lbound=lbound,
ubound=ubound)
er = (er_table.er1 + er_table.er2).values
target_pos['code'] = total_data['code'].values
codes = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
if previous_pos.empty:
current_position = None executed_codes = executed_pos.code.tolist()
turn_over_target = None dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
else:
previous_pos.set_index('code', inplace=True) result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
remained_pos = previous_pos.loc[codes] result = pd.merge(result, dx_returns, on=['code'])
remained_pos.fillna(0., inplace=True) leverage = result.weight_x.abs().sum()
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values ret = (result.weight_x - hedging_ratio * result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
rets.append(ret)
try: executor.set_current(executed_pos)
target_pos, _ = er_portfolio_analysis(er, turn_overs.append(turn_over)
industry, leverags.append(leverage)
None,
constraint, previous_pos = executed_pos
False, alpha_logger.info('{0} is finished'.format(date))
benchmark_w,
method=method, ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
use_rank=use_rank, ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
turn_over_target=turn_over_target, ret_df = ret_df.shift(1)
current_position=current_position, ret_df.iloc[0] = 0.
lbound=lbound, ret_df['tc_cost'] = ret_df.turn_over * 0.002
ubound=ubound)
except ValueError: ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
alpha_logger.info('{0} full re-balance'.format(date)) title='Fixed frequency rebalanced: {0}'.format(frequency),
target_pos, _ = er_portfolio_analysis(er, secondary_y='tc_cost')
industry,
None, ret_df['ret_after_tc'] = ret_df['returns'] - ret_df['tc_cost']
constraint,
False, sharp_calc = MovingSharp(49)
benchmark_w, drawdown_calc = MovingMaxDrawdown(49)
method=method, max_drawdown_calc = MovingMaxDrawdown(len(ret_df))
use_rank=use_rank,
lbound=lbound, res_df = pd.DataFrame(columns=['daily_return', 'cum_ret', 'sharp', 'drawdown', 'max_drawn'])
ubound=ubound)
total_returns = 0.
target_pos['code'] = total_data['code'].values
for i, ret in enumerate(ret_df['ret_after_tc']):
turn_over, executed_pos = executor.execute(target_pos=target_pos) date = ret_df.index[i]
total_returns += ret
executed_codes = executed_pos.code.tolist() sharp_calc.push({'ret': ret, 'riskFree': 0.})
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon) drawdown_calc.push({'ret': ret})
max_drawdown_calc.push({'ret': ret})
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code']) res_df.loc[date, 'daily_return'] = ret
res_df.loc[date, 'cum_ret'] = total_returns
leverage = result.weight_x.abs().sum() res_df.loc[date, 'drawdown'] = drawdown_calc.result()[0]
res_df.loc[date, 'max_drawn'] = max_drawdown_calc.result()[0]
ret = (result.weight_x - hedging_ratio * result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
rets.append(ret) if i < 10:
executor.set_current(executed_pos) res_df.loc[date, 'sharp'] = 0.
turn_overs.append(turn_over) else:
leverags.append(leverage) res_df.loc[date, 'sharp'] = sharp_calc.result() * np.sqrt(49)
previous_pos = executed_pos res_df.to_csv('hs300_{0}.csv'.format(int(weight_gap * 100)))
alpha_logger.info('{0} is finished'.format(date))
# plt.show()
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0}'.format(frequency),
secondary_y='tc_cost')
ret_df['ret_after_tc'] = ret_df['returns'] - ret_df['tc_cost']
sharp_calc = MovingSharp(52)
drawdown_calc = MovingMaxDrawdown(52)
max_drawdown_calc = MovingMaxDrawdown(len(ret_df))
res_df = pd.DataFrame(columns=['daily_return', 'cum_ret', 'sharp', 'drawdown', 'max_drawn'])
total_returns = 0.
for i, ret in enumerate(ret_df['ret_after_tc']):
date = ret_df.index[i]
total_returns += ret
sharp_calc.push({'ret': ret, 'riskFree': 0.})
drawdown_calc.push({'ret': ret})
max_drawdown_calc.push({'ret': ret})
res_df.loc[date, 'daily_return'] = ret
res_df.loc[date, 'cum_ret'] = total_returns
res_df.loc[date, 'drawdown'] = drawdown_calc.result()[0]
res_df.loc[date, 'max_drawn'] = max_drawdown_calc.result()[0]
if i < 10:
res_df.loc[date, 'sharp'] = 0.
else:
res_df.loc[date, 'sharp'] = sharp_calc.result() * np.sqrt(52)
res_df.to_csv('hs300_{0}.csv'.format(int(weight_gap * 100)))
#plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment