Commit 910ae0a4 authored by Dr.李's avatar Dr.李

update example

parent 57c969bd
...@@ -20,12 +20,10 @@ plt.style.use('ggplot') ...@@ -20,12 +20,10 @@ plt.style.use('ggplot')
Back test parameter settings Back test parameter settings
""" """
start_date = '2011-01-01' start_date = '2010-01-01'
end_date = '2018-01-11' end_date = '2018-01-26'
benchmark_code = 300
universe_name = ['zz800'] frequency = '10b'
universe = Universe('custom', universe_name)
frequency = '5b'
method = 'risk_neutral' method = 'risk_neutral'
industry_lower = 1. industry_lower = 1.
industry_upper = 1. industry_upper = 1.
...@@ -33,54 +31,53 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles ...@@ -33,54 +31,53 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
constraint_risk = ['SIZE', 'LEVERAGE'] + industry_styles constraint_risk = ['SIZE', 'LEVERAGE'] + industry_styles
size_risk_lower = 0 size_risk_lower = 0
size_risk_upper = 0 size_risk_upper = 0
turn_over_target_base = 0.25 turn_over_target_base = 0.30
benchmark_total_lower = 0.8 benchmark_total_lower = 0.8
benchmark_total_upper = 1.0 benchmark_total_upper = 1.0
horizon = map_freq(frequency) horizon = map_freq(frequency)
executor = NaiveExecutor() executor = NaiveExecutor()
engine = SqlEngine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha')
""" def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True):
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = ['alpha_factor'] """
factor_name = 'SalesCostRatio' Model phase: we need 1 constant linear model and one linear regression model
base1 = LAST('roe_q') """
base2 = CSRes(LAST('ep_q'), 'roe_q') alpha_name = [factor_name + '_' + ('pos' if positive else 'neg')]
simple_expression = DIFF(CSRes(CSRes(LAST(factor_name), base1), base2)) base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
simple_expression = CSRes(CSRes(CSRes(LAST(factor_name), base1), base2), base3)
const_features = {alpha_name[0]: simple_expression} if not positive:
const_weights = np.array([1.]) simple_expression = -simple_expression
const_model = ConstLinearModel(features=alpha_name, const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights) weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse') ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe, const_model_factor_data = engine.fetch_data_range(universe,
const_features, const_features,
dates=ref_dates, dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna() benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency) horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = [] rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = []
factor_groups = const_model_factor_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
factor_groups = const_model_factor_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
date = value[0] date = value[0]
data = value[1] data = value[1]
ref_date = date.strftime('%Y-%m-%d')
index_dates.append(date) index_dates.append(date)
total_data = data.fillna(data[alpha_name].median()) total_data = data.fillna(data[alpha_name].median())
...@@ -191,21 +188,65 @@ for i, value in enumerate(factor_groups): ...@@ -191,21 +188,65 @@ for i, value in enumerate(factor_groups):
previous_pos = executed_pos previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date)) alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates) ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=index_dates)
# index return # index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=1).set_index('trade_date') offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx'] ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
return alpha_name[0], ret_df
def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe
engine = SqlEngine()
benchmark_code = 300
universe_name = ['zz800']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True)
def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe
engine = SqlEngine()
benchmark_code = 300
universe_name = ['zz800']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False)
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0. for f_name, df in res1:
ret_df = ret_df.shift(1) factor_df[f_name] = df['returns']
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), for f_name, df in res2:
title='Fixed frequency rebalanced: {0} for {1}'.format(frequency, factor_name), factor_df[f_name] = df['returns']
secondary_y='tc_cost')
plt.show() # ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, benchmark_code),
# secondary_y='tc_cost')
...@@ -13,6 +13,7 @@ from alphamind.api import * ...@@ -13,6 +13,7 @@ from alphamind.api import *
def factor_residue_analysis(start_date, def factor_residue_analysis(start_date,
end_date, end_date,
factor_name,
factor, factor,
freq, freq,
universe, universe,
...@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date, ...@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date,
tenor=freq, tenor=freq,
calendar='china.sse') calendar='china.sse')
alpha_factor_name = factor + '_res' alpha_factor_name = factor_name + '_res'
base1 = LAST('roe_q') alpha_factor = {alpha_factor_name: factor}
base2 = CSRes(LAST('ep_q'), 'roe_q')
alpha_factor = {alpha_factor_name: CSRes(CSRes(LAST(factor), base1), base2)}
factor_all_data = engine.fetch_data_range(universe, factor_all_data = engine.fetch_data_range(universe,
alpha_factor, alpha_factor,
dates=dates)['factor'] dates=dates)['factor']
...@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date, ...@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date,
return df return df
engine = SqlEngine() def factor_analysis(f_name):
df = engine.fetch_factor_coverage().groupby('factor').mean() from alphamind.api import SqlEngine, Universe, alpha_logger
df = df[df.coverage >= 0.98] engine = SqlEngine()
universe = Universe('custom', ['zz800']) universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
factor_df = pd.DataFrame() base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
for i, factor in enumerate(df.index): factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2011-01-01', res = factor_residue_analysis('2010-01-01',
'2018-01-05', '2018-01-26',
f_name,
factor, factor,
'5b', '10b',
universe, universe,
engine) engine)
factor_df[factor] = res['$top1 - bottom1$'] alpha_logger.info('{0} is done'.format(f_name))
alpha_logger.info('{0}: {1} is done'.format(i + 1, factor)) return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment