Commit 910ae0a4 authored by Dr.李's avatar Dr.李

update example

parent 57c969bd
...@@ -20,12 +20,10 @@ plt.style.use('ggplot') ...@@ -20,12 +20,10 @@ plt.style.use('ggplot')
Back test parameter settings Back test parameter settings
""" """
start_date = '2011-01-01' start_date = '2010-01-01'
end_date = '2018-01-11' end_date = '2018-01-26'
benchmark_code = 300
universe_name = ['zz800'] frequency = '10b'
universe = Universe('custom', universe_name)
frequency = '5b'
method = 'risk_neutral' method = 'risk_neutral'
industry_lower = 1. industry_lower = 1.
industry_upper = 1. industry_upper = 1.
...@@ -33,179 +31,222 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles ...@@ -33,179 +31,222 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
constraint_risk = ['SIZE', 'LEVERAGE'] + industry_styles constraint_risk = ['SIZE', 'LEVERAGE'] + industry_styles
size_risk_lower = 0 size_risk_lower = 0
size_risk_upper = 0 size_risk_upper = 0
turn_over_target_base = 0.25 turn_over_target_base = 0.30
benchmark_total_lower = 0.8 benchmark_total_lower = 0.8
benchmark_total_upper = 1.0 benchmark_total_upper = 1.0
horizon = map_freq(frequency) horizon = map_freq(frequency)
executor = NaiveExecutor() executor = NaiveExecutor()
engine = SqlEngine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha')
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = ['alpha_factor']
factor_name = 'SalesCostRatio'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
simple_expression = DIFF(CSRes(CSRes(LAST(factor_name), base1), base2))
const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = [] def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True):
"""
factor_groups = const_model_factor_data.groupby('trade_date') Model phase: we need 1 constant linear model and one linear regression model
"""
for i, value in enumerate(factor_groups): alpha_name = [factor_name + '_' + ('pos' if positive else 'neg')]
date = value[0] base1 = LAST('Alpha60')
data = value[1] base2 = CSRes('roe_q', base1)
ref_date = date.strftime('%Y-%m-%d') base3 = CSRes(CSRes('ep_q', base1), base2)
index_dates.append(date) simple_expression = CSRes(CSRes(CSRes(LAST(factor_name), base1), base2), base3)
total_data = data.fillna(data[alpha_name].median()) if not positive:
alpha_logger.info('{0}: {1}'.format(date, len(total_data))) simple_expression = -simple_expression
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values const_features = {alpha_name[0]: simple_expression}
benchmark_w = total_data.weight.values const_weights = np.array([1.])
constraint_exp = total_data[constraint_risk].values const_model = ConstLinearModel(features=alpha_name,
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float) weights=const_weights)
risk_names = constraint_risk + ['total'] ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
risk_target = risk_exp_expand.T @ benchmark_w
const_model_factor_data = engine.fetch_data_range(universe,
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data)) const_features,
ubound = 0.02 + benchmark_w dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
is_in_benchmark = (benchmark_w > 0.).astype(float)
horizon = map_freq(frequency)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total') rets = []
turn_overs = []
constraint = Constraints(risk_exp_expand, risk_names) leverags = []
previous_pos = pd.DataFrame()
for j, name in enumerate(risk_names): index_dates = []
if name == 'total': factor_groups = const_model_factor_data.groupby('trade_date')
constraint.set_constraints(name,
lower_bound=risk_target[j], for i, value in enumerate(factor_groups):
upper_bound=risk_target[j]) date = value[0]
elif name == 'SIZE': data = value[1]
base_target = abs(risk_target[j]) index_dates.append(date)
constraint.set_constraints(name,
lower_bound=risk_target[j] + base_target * size_risk_lower, total_data = data.fillna(data[alpha_name].median())
upper_bound=risk_target[j] + base_target * size_risk_upper) alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
elif name == 'benchmark_total': risk_exp = total_data[neutralize_risk].values.astype(float)
base_target = benchmark_w.sum() industry = total_data.industry_code.values
constraint.set_constraints(name, benchmark_w = total_data.weight.values
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target) constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data))
ubound = 0.02 + benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
constraint = Constraints(risk_exp_expand, risk_names)
for j, name in enumerate(risk_names):
if name == 'total':
constraint.set_constraints(name,
lower_bound=risk_target[j],
upper_bound=risk_target[j])
elif name == 'SIZE':
base_target = abs(risk_target[j])
constraint.set_constraints(name,
lower_bound=risk_target[j] + base_target * size_risk_lower,
upper_bound=risk_target[j] + base_target * size_risk_upper)
elif name == 'benchmark_total':
base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
else:
constraint.set_constraints(name,
lower_bound=risk_target[j] * industry_lower,
upper_bound=risk_target[j] * industry_upper)
factor_values = factor_processing(total_data[alpha_name].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er = const_model.predict(factor_values)
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
else: else:
constraint.set_constraints(name, previous_pos.set_index('code', inplace=True)
lower_bound=risk_target[j] * industry_lower, remained_pos = previous_pos.loc[codes]
upper_bound=risk_target[j] * industry_upper)
remained_pos.fillna(0., inplace=True)
factor_values = factor_processing(total_data[alpha_name].values, turn_over_target = turn_over_target_base
pre_process=[winsorize_normal, standardize], current_position = remained_pos.weight.values
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize]) try:
target_pos, _ = er_portfolio_analysis(er,
# const linear model industry,
er = const_model.predict(factor_values) None,
constraint,
codes = total_data['code'].values False,
benchmark_w,
if previous_pos.empty: method=method,
current_position = None turn_over_target=turn_over_target,
turn_over_target = None current_position=current_position,
else: lbound=lbound,
previous_pos.set_index('code', inplace=True) ubound=ubound)
remained_pos = previous_pos.loc[codes] except ValueError:
alpha_logger.info('{0} full re-balance'.format(date))
remained_pos.fillna(0., inplace=True) target_pos, _ = er_portfolio_analysis(er,
turn_over_target = turn_over_target_base industry,
current_position = remained_pos.weight.values None,
constraint,
try: False,
target_pos, _ = er_portfolio_analysis(er, benchmark_w,
industry, method=method,
None, lbound=lbound,
constraint, ubound=ubound)
False,
benchmark_w, target_pos['code'] = total_data['code'].values
method=method,
turn_over_target=turn_over_target, turn_over, executed_pos = executor.execute(target_pos=target_pos)
current_position=current_position,
lbound=lbound, executed_codes = executed_pos.code.tolist()
ubound=ubound) dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
except ValueError:
alpha_logger.info('{0} full re-balance'.format(date)) result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
target_pos, _ = er_portfolio_analysis(er, result = pd.merge(result, dx_returns, on=['code'])
industry,
None, leverage = result.weight_x.abs().sum()
constraint,
False, ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
benchmark_w, rets.append(np.log(1. + ret))
method=method, executor.set_current(executed_pos)
lbound=lbound, turn_overs.append(turn_over)
ubound=ubound) leverags.append(leverage)
target_pos['code'] = total_data['code'].values previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date))
turn_over, executed_pos = executor.execute(target_pos=target_pos)
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=index_dates)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1) # index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner') offset=1).set_index('trade_date')
result = pd.merge(result, dx_returns, on=['code']) ret_df['index'] = index_return['dx']
leverage = result.weight_x.abs().sum() ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.) ret_df.iloc[0] = 0.
rets.append(np.log(1. + ret)) ret_df['tc_cost'] = ret_df.turn_over * 0.002
executor.set_current(executed_pos) ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
turn_overs.append(turn_over)
leverags.append(leverage) return alpha_name[0], ret_df
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date)) def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates) engine = SqlEngine()
benchmark_code = 300
# index return universe_name = ['zz800']
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, universe = Universe('custom', universe_name)
offset=1).set_index('trade_date') return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True)
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0. def worker_func_negative(factor_name):
ret_df = ret_df.shift(1) from alphamind.api import SqlEngine, Universe
ret_df.iloc[0] = 0. engine = SqlEngine()
ret_df['tc_cost'] = ret_df.turn_over * 0.002 benchmark_code = 300
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage'] universe_name = ['zz800']
universe = Universe('custom', universe_name)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False)
title='Fixed frequency rebalanced: {0} for {1}'.format(frequency, factor_name),
secondary_y='tc_cost')
if __name__ == '__main__':
plt.show() from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
for f_name, df in res1:
factor_df[f_name] = df['returns']
for f_name, df in res2:
factor_df[f_name] = df['returns']
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, benchmark_code),
# secondary_y='tc_cost')
...@@ -13,6 +13,7 @@ from alphamind.api import * ...@@ -13,6 +13,7 @@ from alphamind.api import *
def factor_residue_analysis(start_date, def factor_residue_analysis(start_date,
end_date, end_date,
factor_name,
factor, factor,
freq, freq,
universe, universe,
...@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date, ...@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date,
tenor=freq, tenor=freq,
calendar='china.sse') calendar='china.sse')
alpha_factor_name = factor + '_res' alpha_factor_name = factor_name + '_res'
base1 = LAST('roe_q') alpha_factor = {alpha_factor_name: factor}
base2 = CSRes(LAST('ep_q'), 'roe_q')
alpha_factor = {alpha_factor_name: CSRes(CSRes(LAST(factor), base1), base2)}
factor_all_data = engine.fetch_data_range(universe, factor_all_data = engine.fetch_data_range(universe,
alpha_factor, alpha_factor,
dates=dates)['factor'] dates=dates)['factor']
...@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date, ...@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date,
return df return df
engine = SqlEngine() def factor_analysis(f_name):
df = engine.fetch_factor_coverage().groupby('factor').mean() from alphamind.api import SqlEngine, Universe, alpha_logger
df = df[df.coverage >= 0.98] engine = SqlEngine()
universe = Universe('custom', ['zz800']) universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
factor_df = pd.DataFrame() base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
for i, factor in enumerate(df.index): factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2011-01-01', res = factor_residue_analysis('2010-01-01',
'2018-01-05', '2018-01-26',
f_name,
factor, factor,
'5b', '10b',
universe, universe,
engine) engine)
factor_df[factor] = res['$top1 - bottom1$'] alpha_logger.info('{0} is done'.format(f_name))
alpha_logger.info('{0}: {1} is done'.format(i + 1, factor)) return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment