Commit 910ae0a4 authored by Dr.李's avatar Dr.李

update example

parent 57c969bd
......@@ -20,12 +20,10 @@ plt.style.use('ggplot')
Back test parameter settings
"""
start_date = '2011-01-01'
end_date = '2018-01-11'
benchmark_code = 300
universe_name = ['zz800']
universe = Universe('custom', universe_name)
frequency = '5b'
start_date = '2010-01-01'
end_date = '2018-01-26'
frequency = '10b'
method = 'risk_neutral'
industry_lower = 1.
industry_upper = 1.
......@@ -33,179 +31,222 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
constraint_risk = ['SIZE', 'LEVERAGE'] + industry_styles
size_risk_lower = 0
size_risk_upper = 0
turn_over_target_base = 0.25
turn_over_target_base = 0.30
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
horizon = map_freq(frequency)
executor = NaiveExecutor()
engine = SqlEngine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha')
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = ['alpha_factor']
factor_name = 'SalesCostRatio'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
simple_expression = DIFF(CSRes(CSRes(LAST(factor_name), base1), base2))
const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = []
factor_groups = const_model_factor_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
ref_date = date.strftime('%Y-%m-%d')
index_dates.append(date)
total_data = data.fillna(data[alpha_name].median())
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data))
ubound = 0.02 + benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
constraint = Constraints(risk_exp_expand, risk_names)
for j, name in enumerate(risk_names):
if name == 'total':
constraint.set_constraints(name,
lower_bound=risk_target[j],
upper_bound=risk_target[j])
elif name == 'SIZE':
base_target = abs(risk_target[j])
constraint.set_constraints(name,
lower_bound=risk_target[j] + base_target * size_risk_lower,
upper_bound=risk_target[j] + base_target * size_risk_upper)
elif name == 'benchmark_total':
base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True):
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = [factor_name + '_' + ('pos' if positive else 'neg')]
base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
simple_expression = CSRes(CSRes(CSRes(LAST(factor_name), base1), base2), base3)
if not positive:
simple_expression = -simple_expression
const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = []
factor_groups = const_model_factor_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
index_dates.append(date)
total_data = data.fillna(data[alpha_name].median())
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data))
ubound = 0.02 + benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
constraint = Constraints(risk_exp_expand, risk_names)
for j, name in enumerate(risk_names):
if name == 'total':
constraint.set_constraints(name,
lower_bound=risk_target[j],
upper_bound=risk_target[j])
elif name == 'SIZE':
base_target = abs(risk_target[j])
constraint.set_constraints(name,
lower_bound=risk_target[j] + base_target * size_risk_lower,
upper_bound=risk_target[j] + base_target * size_risk_upper)
elif name == 'benchmark_total':
base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
else:
constraint.set_constraints(name,
lower_bound=risk_target[j] * industry_lower,
upper_bound=risk_target[j] * industry_upper)
factor_values = factor_processing(total_data[alpha_name].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er = const_model.predict(factor_values)
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
else:
constraint.set_constraints(name,
lower_bound=risk_target[j] * industry_lower,
upper_bound=risk_target[j] * industry_upper)
factor_values = factor_processing(total_data[alpha_name].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er = const_model.predict(factor_values)
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
else:
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values
try:
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
turn_over_target=turn_over_target,
current_position=current_position,
lbound=lbound,
ubound=ubound)
except ValueError:
alpha_logger.info('{0} full re-balance'.format(date))
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
lbound=lbound,
ubound=ubound)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0} for {1}'.format(frequency, factor_name),
secondary_y='tc_cost')
plt.show()
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values
try:
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
turn_over_target=turn_over_target,
current_position=current_position,
lbound=lbound,
ubound=ubound)
except ValueError:
alpha_logger.info('{0} full re-balance'.format(date))
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
lbound=lbound,
ubound=ubound)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=index_dates)
# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
return alpha_name[0], ret_df
def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe
engine = SqlEngine()
benchmark_code = 300
universe_name = ['zz800']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True)
def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe
engine = SqlEngine()
benchmark_code = 300
universe_name = ['zz800']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False)
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
for f_name, df in res1:
factor_df[f_name] = df['returns']
for f_name, df in res2:
factor_df[f_name] = df['returns']
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, benchmark_code),
# secondary_y='tc_cost')
......@@ -13,6 +13,7 @@ from alphamind.api import *
def factor_residue_analysis(start_date,
end_date,
factor_name,
factor,
freq,
universe,
......@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date,
tenor=freq,
calendar='china.sse')
alpha_factor_name = factor + '_res'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
alpha_factor = {alpha_factor_name: CSRes(CSRes(LAST(factor), base1), base2)}
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: factor}
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
......@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date,
return df
engine = SqlEngine()
df = engine.fetch_factor_coverage().groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
for i, factor in enumerate(df.index):
res = factor_residue_analysis('2011-01-01',
'2018-01-05',
def factor_analysis(f_name):
from alphamind.api import SqlEngine, Universe, alpha_logger
engine = SqlEngine()
universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2010-01-01',
'2018-01-26',
f_name,
factor,
'5b',
'10b',
universe,
engine)
factor_df[factor] = res['$top1 - bottom1$']
alpha_logger.info('{0}: {1} is done'.format(i + 1, factor))
alpha_logger.info('{0} is done'.format(f_name))
return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment