Commit e007f467 authored by Dr.李's avatar Dr.李

removed unmaintained examples

parent f10450af
This diff is collapsed.
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *
plt.style.use('ggplot')
"""
Back test parameter settings
"""
start_date = '2017-01-01'
end_date = '2017-11-06'
benchmark_code = 905
universe_name = 'zz500'
universe = Universe(universe_name, [universe_name])
frequency = '2w'
batch = 4
method = 'risk_neutral'
use_rank = 100
industry_lower = 1.
industry_upper = 1.
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
horizon = map_freq(frequency)
executor = NaiveExecutor()
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
const_features = ["IVR", "eps_q", "DivP", "CFinc1", "BDTO"]
const_weights = np.array([0.05, 0.2, 0.075, 0.15, 0.05])
const_model = ConstLinearModel(features=const_features,
weights=const_weights)
linear_model_features = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'ivr': LAST('IVR'),
'val': LAST('VAL'),
'grev': LAST('GREV')
}
"""
Data phase
"""
engine = SqlEngine()
linear_model_factor_data = fetch_data_package(engine,
alpha_factors=linear_model_features,
start_date=start_date,
end_date=end_date,
frequency=frequency,
universe=universe,
benchmark=benchmark_code,
batch=batch,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize],
warm_start=batch)
train_x = linear_model_factor_data['train']['x']
train_y = linear_model_factor_data['train']['y']
ref_dates = sorted(train_x.keys())
predict_x = linear_model_factor_data['predict']['x']
predict_y = linear_model_factor_data['predict']['y']
settlement = linear_model_factor_data['settlement']
linear_model_features = linear_model_factor_data['x_names']
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor']
const_return_data = engine.fetch_dx_return_range(universe, dates=ref_dates, horizon=horizon)
"""
Training phase
"""
models_series = pd.Series()
for ref_date in ref_dates:
x = train_x[ref_date]
y = train_y[ref_date].flatten()
model = LinearRegression(linear_model_features, fit_intercept=False)
model.fit(x, y)
models_series.loc[ref_date] = model
alpha_logger.info('trade_date: {0} training finished'.format(ref_date))
"""
Predicting and rebalance phase
"""
frequency = '1d'
horizon = map_freq(frequency)
dates = makeSchedule(start_date,
end_date,
tenor=frequency,
calendar='china.sse',
dateGenerationRule=DateGeneration.Forward)
total_factors = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'ivr': LAST('IVR'),
'val': LAST('VAL'),
'grev': LAST('GREV'),
'divp': LAST('DivP')
}
all_data = engine.fetch_data_range(universe, total_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')
rets = []
turn_overs = []
leverags = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
codes = data.code.tolist()
ref_date = date.strftime('%Y-%m-%d')
total_data = pd.merge(data, returns, on=['code']).dropna()
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
dx_return = total_data.dx.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.zeros(len(total_data))
ubound = 0.01 + benchmark_w
constraint = Constraints(risk_exp_expand, risk_names)
for i, name in enumerate(risk_names):
if name == 'total' or name == 'SIZE':
constraint.set_constraints(name,
lower_bound=risk_target[i],
upper_bound=risk_target[i])
else:
constraint.set_constraints(name,
lower_bound=risk_target[i] * industry_lower,
upper_bound=risk_target[i] * industry_upper)
factor_values = factor_processing(total_data[const_features].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er1 = const_model.predict(factor_values)
# linear regression model
models = models_series[models_series.index <= date]
model = models[-1]
x = predict_x[date]
er2 = model.predict(x)
# combine model
er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values})
er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': settlement.loc[settlement.trade_date == date, 'code'].values})
er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0)
er = (er_table.er1 + er_table.er2).values
target_pos, _ = er_portfolio_analysis(er,
industry,
dx_return,
constraint,
False,
benchmark_w,
method=method,
use_rank=use_rank)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ np.exp(result.dx.values)
rets.append(ret)
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=ref_dates)
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0}'.format(frequency),
secondary_y='tc_cost')
plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2017-12-30
@author: cheng.li
"""
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PyFin.api import *
from alphamind.api import *
engine = SqlEngine()
start_date = '2017-01-01'
end_date = '2017-12-25'
universe = Universe('custom', ['zz800'])
neutralize_risk = ['SIZE'] + industry_styles
factors = [CSRes(LAST('closePrice') / LAST('openPrice'), LAST('turnoverVol')),
LAST('lowestPrice')]
benchmark = 300
build_type = 'risk_neutral'
freq = '5b'
horizon = map_freq(freq)
factors_data = fetch_data_package(engine,
alpha_factors=factors,
start_date=start_date,
end_date=end_date,
frequency=freq,
universe=universe,
benchmark=benchmark,
batch=1,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize])
x_names = factors_data['x_names']
train_x = factors_data['train']['x']
train_y = factors_data['train']['y']
ref_dates = sorted(train_x.keys())
predict_x = factors_data['predict']['x']
settlement = factors_data['settlement']
benchmark_w = settlement['weight'].values
industry_names = settlement['industry'].values
realized_r = settlement['dx'].values
risk_exp = settlement[neutralize_risk].values
"""
Training phase
"""
models_series = pd.Series()
for date in ref_dates:
x = train_x[date]
y = train_y[date].flatten()
model = LinearRegression(fit_intercept=False, features=x_names)
model.fit(x, y)
models_series.loc[date] = model
alpha_logger.info('trade_date: {0} training finished'.format(date))
"""
Predicting and re-balance phase
"""
index_dates = []
final_res = np.zeros(len(ref_dates))
for i, date in enumerate(ref_dates):
this_date_x = predict_x[date]
index = settlement.trade_date == date
this_benchmark_w = benchmark_w[index]
this_industry_names = industry_names[index]
this_realized_r = realized_r[index]
# linear regression model
model = models_series[date]
predict_y = model.predict(this_date_x)
# set constraint
this_risk_exp = risk_exp[index]
lbound = np.zeros(len(this_date_x))
ubound = 0.02 * np.ones(len(this_date_x))
cons = Constraints()
cons.add_exposure(neutralize_risk, this_risk_exp)
risk_target = this_risk_exp.T @ this_benchmark_w
for k, name in enumerate(neutralize_risk):
cons.set_constraints(name, risk_target[k], risk_target[k])
weights, analysis = er_portfolio_analysis(predict_y,
this_industry_names,
this_realized_r,
constraints=cons,
detail_analysis=True,
benchmark=this_benchmark_w,
method=build_type)
final_res[i] = analysis['er']['total']
alpha_logger.info('trade_date: {0} predicting finished'.format(date))
# Plot the cumulative returns
df = pd.Series(final_res, index=ref_dates)
df.sort_index(inplace=True)
df.cumsum().plot()
plt.title('Factors model {1} ({0})'.format(build_type, models_series.iloc[0].__class__.__name__))
plt.show()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
plt.style.use('ggplot')
"""
Back test parameter settings
"""
start_date = '2010-01-01'
end_date = '2018-02-27'
freq = '10b'
industry_lower = 1.
industry_upper = 1.
neutralized_risk = industry_styles
industry_name = 'sw'
industry_level = 1
turn_over_target_base = 2.0
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
batch = 0
horizon = map_freq(freq)
weight_gap = 0.01
universe = Universe("custom", ['zz800'])
data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
benchmark_code = 905
offset = 1
executor = NaiveExecutor()
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
engine = SqlEngine(data_source)
alpha_factors = {
'f01': LAST('ep_q'),
'f02': LAST('roe_q'),
'f03': LAST('market_confidence_25d'),
'f04': LAST('ILLIQUIDITY'),
'f05': LAST('cfinc1_q'),
'f06': LAST('CFO2EV'),
'f07': LAST('IVR'),
'f08': LAST('con_pe_rolling_order'),
'f09': LAST('con_pb_rolling_order'),
}
weights = dict(f01=1.,
f02=1.,
f03=0.25,
f04=0.25,
f05=0.25,
f06=0.25,
f07=0.25,
f08=-0.25,
f09=-0.25)
alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
def train_worker(ref_date):
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model='short',
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize, rank],
warm_start=0,
data_source=data_source)
return train_model(ref_date, alpha_model, data_meta)
def predict_worker(params):
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model='short',
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize, rank],
warm_start=0,
data_source=data_source)
ref_date, model = params
er = predict_by_model(ref_date, model, data_meta)
return er
predicts = [predict_worker((d.strftime('%Y-%m-%d'), alpha_model)) for d in ref_dates]
# rebalance
industry_names = industry_list(industry_name, industry_level)
constraint_risk = ['SIZE', 'SIZENL', 'BETA'] + industry_names
total_risk_names = constraint_risk + ['benchmark', 'total']
b_type = []
l_val = []
u_val = []
previous_pos = pd.DataFrame()
rets = []
turn_overs = []
leverags = []
for name in total_risk_names:
if name == 'benchmark':
b_type.append(BoundaryType.RELATIVE)
l_val.append(benchmark_total_lower)
u_val.append(benchmark_total_upper)
elif name in {'SIZE', 'SIZENL', 'BETA'}:
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0.0)
u_val.append(0.0)
else:
b_type.append(BoundaryType.RELATIVE)
l_val.append(industry_lower)
u_val.append(industry_upper)
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name,
level=industry_level)
benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)
risk_total = engine.fetch_risk_model_range(universe, dates=ref_dates)[1]
for i, ref_date in enumerate(ref_dates):
ref_date = ref_date.strftime('%Y-%m-%d')
industry_matrix = industry_total[industry_total.trade_date == ref_date]
benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]
risk_matrix = risk_total[risk_total.trade_date == ref_date]
res = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)
res = pd.merge(res, risk_matrix, on=['code'])
res = res.dropna()
codes = res.code.values.tolist()
benchmark_w = res.weight.values
is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))
total_risk_exp = np.concatenate([res[constraint_risk].values.astype(float),
is_in_benchmark,
np.ones_like(is_in_benchmark)],
axis=1)
total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)
constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)
lbound = np.maximum(0., benchmark_w - weight_gap) # np.zeros(len(total_data))
ubound = weight_gap + benchmark_w
if previous_pos.empty:
current_position = None
turn_over_target = None
else:
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values
er = predicts[i].loc[codes].values
try:
alpha_logger.info('{0} partial re-balance: {1}'.format(ref_date, len(er)))
target_pos, _ = er_portfolio_analysis(er,
industry_matrix.industry_name.values,
None,
constraints,
False,
benchmark_w,
method='risk_neutral',
turn_over_target=turn_over_target,
current_position=current_position,
lbound=lbound,
ubound=ubound)
except ValueError:
alpha_logger.info('{0} full re-balance: {1}'.format(ref_date, len(er)))
target_pos, _ = er_portfolio_analysis(er,
industry_matrix.industry_name.values,
None,
constraints,
False,
benchmark_w,
method='risk_neutral',
lbound=lbound,
ubound=ubound)
target_pos['code'] = codes
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=offset)
result = pd.merge(executed_pos, dx_returns, on=['code'])
leverage = result.weight.abs().sum()
ret = result.weight.values @ (np.exp(result.dx.values) - 1.)
rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(ref_date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=ref_dates)
# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=offset).set_index('trade_date')
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq, 905),
secondary_y='tc_cost')
ret_df[['returns', 'tc_cost']][-30:].cumsum().plot(figsize=(12, 6),
title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq,
905),
secondary_y='tc_cost')
plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
def factor_residue_analysis(start_date,
end_date,
factor_name,
factor,
freq,
universe,
engine):
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
n_bins = 5
horizon = map_freq(freq)
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: factor}
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df['$top1 - bottom1$'] = df[4] - df[0]
return df
def factor_analysis(f_name):
from alphamind.api import SqlEngine, Universe, alpha_logger
engine = SqlEngine()
universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2010-01-01',
'2018-01-26',
f_name,
factor,
'10b',
universe,
engine)
alpha_logger.info('{0} is done'.format(f_name))
return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.13:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
# -*- coding: utf-8 -*-
"""
Created on 2017-9-5
@author: cheng.li
"""
import pandas as pd
import numpy as np
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
plt.style.use('ggplot')
sentiment_df = pd.read_csv('d:/xueqiu.csv', parse_dates=['trade_date']).sort_values(['trade_date', 'code']).set_index('trade_date')
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
index_name = 'zz500'
benchmark = 905
universe = Universe(index_name, [index_name])
neutralized_risk = ['SIZE'] + industry_styles
expression = MA(5, ['post'])
n_bins = 5
frequency = '1w'
new_factor_df = expression.transform(sentiment_df, name='xueqiu', category_field='code').reset_index()
factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']
weights = np.array([0.015881607, -0.015900173, -0.001792638,
0.014277867, 0.034129344, 0.019044573,
0.042747382, 0.048765746])
start_datge = '2016-01-01'
end_date = '2017-09-03'
dates = makeSchedule(start_datge, end_date, frequency, 'china.sse')
total_data = engine.fetch_data_range(universe,
factors,
dates=dates,
benchmark=benchmark)
return_data = engine.fetch_dx_return_range(universe,
dates=dates,
horizon=4)
settle_df = total_data['factor']
settle_df = pd.merge(settle_df, new_factor_df, on=['trade_date', 'code'])
settle_df = pd.merge(settle_df, return_data, on=['trade_date', 'code'])
settle_df.dropna(inplace=True)
settle_df.set_index('trade_date', inplace=True)
dates = settle_df.index.unique()
final_res = np.zeros(len(dates))
for i, date in enumerate(dates):
risk_exp = settle_df.loc[date, neutralized_risk].values
raw_factor = settle_df.loc[date, factors].values @ weights
dx_return = settle_df.loc[date, 'dx'].values
benchmark_w = settle_df.loc[date, 'weight'].values
neutralized_factor = factor_processing(raw_factor.reshape((-1, 1)),
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[standardize])
is_tradable = settle_df.loc[date, 'isOpen'].values.copy()
xueqiu_values = settle_df.loc[date, 'xueqiu'].values
top_p = np.percentile(xueqiu_values, 95)
is_tradable[xueqiu_values > top_p] = False
industry = settle_df.loc[date, 'industry'].values
constraints = Constraints(np.ones((len(is_tradable), 1)), ['total'])
constraints.set_constraints('total', benchmark_w.sum(), benchmark_w.sum())
res = er_portfolio_analysis(neutralized_factor,
industry,
dx_return=dx_return,
method='risk_neutral',
constraints=constraints,
is_tradable=is_tradable,
benchmark=benchmark_w)
final_res[i] = res[1]['er']['total']
print('{0} is finished'.format(date))
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
plt.style.use('ggplot')
import datetime as dt
start = dt.datetime.now()
universe = Universe('custom', ['zz800'])
factor_name = 'Beta20'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
simple_expression = CSRes(CSRes(LAST(factor_name), base1), base2)
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: simple_expression}
# end of formula definition
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
freq = '5b'
n_bins = 5
horizon = map_freq(freq)
start_date = '2012-01-01'
end_date = '2018-01-05'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
codes = data.code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
f_data = total_data[[alpha_factor_name]]
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
fig, axes = plt.subplots(1, 2, figsize=(18, 6))
df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(alpha_factor_name))
# =================================================================== #
alpha_factor_name = alpha_factor_name + '_1w_diff'
alpha_factor = {alpha_factor_name: DIFF(simple_expression)}
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
codes = data.code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
f_data = total_data[[alpha_factor_name]]
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot(ax=axes[1], title='Quantile Analysis for {0}'.format(alpha_factor_name))
plt.show()
print(dt.datetime.now() - start)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-8-24
@author: cheng.li
"""
import numpy as np
import pandas as pd
import copy
from sklearn.linear_model import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import NuSVR
from alphamind.api import *
from PyFin.api import *
from matplotlib import pyplot as plt
plt.style.use('ggplot')
'''
Settings:
universe - zz500
neutralize - all industries
benchmark - zz500
base factors - all the risk styles
quantiles - 5
start_date - 2012-01-01
end_date - 2017-08-01
re-balance - 2 week
training - every 8 week
'''
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('zz500', ['zz500'])
neutralize_risk = industry_styles
portfolio_risk_neutralize = []
portfolio_industry_neutralize = True
alpha_factors = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'rvol': LAST('RVOL'),
'val': LAST('VAL'),
'grev': LAST('GREV'),
'droeafternonorecurring': LAST('DROEAfterNonRecurring')}
benchmark = 905
n_bins = 5
frequency = '2w'
batch = 8
start_date = '2012-01-01'
end_date = '2017-11-05'
method = 'risk_neutral'
use_rank = 100
'''
fetch data from target data base and do the corresponding data processing
'''
data_package = fetch_data_package(engine,
alpha_factors=alpha_factors,
start_date=start_date,
end_date=end_date,
frequency=frequency,
universe=universe,
benchmark=benchmark,
batch=batch,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize],
warm_start=batch)
'''
training phase: using Linear - regression from scikit-learn
'''
train_x = data_package['train']['x']
train_y = data_package['train']['y']
dates = sorted(train_x.keys())
model_df = pd.Series()
features = data_package['x_names']
for train_date in dates:
model = LinearRegression(features, fit_intercept=False)
x = train_x[train_date]
y = train_y[train_date]
model.fit(x, y)
model_df.loc[train_date] = model
alpha_logger.info('trade_date: {0} training finished'.format(train_date))
'''
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
predict_x = data_package['predict']['x']
settlement = data_package['settlement']
industry_dummies = pd.get_dummies(settlement['industry'].values)
risk_styles = settlement[portfolio_risk_neutralize].values
final_res = np.zeros(len(dates))
for i, predict_date in enumerate(dates):
model = model_df[predict_date]
x = predict_x[predict_date]
cons = Constraints()
index = settlement.trade_date == predict_date
benchmark_w = settlement[index]['weight'].values
realized_r = settlement[index]['dx'].values
industry_names = settlement[index]['industry'].values
is_tradable = settlement[index]['isOpen'].values
cons.add_exposure(['total'], np.ones((len(is_tradable), 1)))
cons.set_constraints('total', benchmark_w.sum(), benchmark_w.sum())
if portfolio_industry_neutralize:
ind_exp = industry_dummies[index]
risk_tags = ind_exp.columns
cons.add_exposure(risk_tags, ind_exp.values)
benchmark_exp = benchmark_w @ ind_exp.values
for k, name in enumerate(risk_tags):
cons.set_constraints(name, benchmark_exp[k], benchmark_exp[k])
if portfolio_risk_neutralize:
risk_exp = risk_styles[index]
risk_tags = np.array(portfolio_risk_neutralize)
cons.add_exposure(risk_tags, risk_exp)
benchmark_exp = benchmark_w @ risk_exp
for k, name in enumerate(risk_tags):
cons.set_constraints(name, benchmark_exp[k], benchmark_exp[k])
predict_y = model.predict(x)
is_tradable[:] = True
weights, analysis = er_portfolio_analysis(predict_y,
industry_names,
realized_r,
constraints=cons,
detail_analysis=True,
benchmark=benchmark_w,
is_tradable=is_tradable,
method=method,
use_rank=use_rank)
final_res[i] = analysis['er']['total'] / benchmark_w.sum()
alpha_logger.info('trade_date: {0} predicting finished'.format(predict_date))
last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
df = pd.Series(final_res, index=dates[1:] + [last_date])
df.sort_index(inplace=True)
df.cumsum().plot()
plt.title('Prod factors model {1} ({0})'.format(method, model.__class__.__name__))
plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2017-9-5
@author: cheng.li
"""
import math
import pandas as pd
import numpy as np
from PyFin.api import *
from alphamind.api import *
factor = 'ROE'
universe = Universe('custom', ['zz800'])
start_date = '2010-01-01'
end_date = '2018-04-26'
freq = '10b'
category = 'sw_adj'
level = 1
horizon = map_freq(freq)
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
def factor_analysis(factor):
engine = SqlEngine()
factors = {
'f1': CSQuantiles(factor),
'f2': CSQuantiles(factor, groups='sw1_adj'),
'f3': LAST(factor)
}
total_factor = engine.fetch_factor_range(universe, factors, dates=ref_dates)
_, risk_exp = engine.fetch_risk_model_range(universe, dates=ref_dates)
industry = engine.fetch_industry_range(universe, dates=ref_dates, category=category, level=level)
rets = engine.fetch_dx_return_range(universe, horizon=horizon, offset=1, dates=ref_dates)
total_factor = pd.merge(total_factor, industry[['trade_date', 'code', 'industry']], on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, risk_exp, on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, rets, on=['trade_date', 'code']).dropna()
df_ret = pd.DataFrame(columns=['f1', 'f2', 'f3'])
df_ic = pd.DataFrame(columns=['f1', 'f2', 'f3'])
total_factor_groups = total_factor.groupby('trade_date')
for date, this_factors in total_factor_groups:
raw_factors = this_factors['f3'].values
industry_exp = this_factors[industry_styles + ['COUNTRY']].values.astype(float)
processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp,
post_process=[percentile])
this_factors['f3'] = processed_values
factor_values = this_factors[['f1', 'f2', 'f3']].values
positions = (factor_values >= 0.8) * 1.
positions[factor_values <= 0.2] = -1
positions /= np.abs(positions).sum(axis=0)
ret_values = this_factors.dx.values @ positions
df_ret.loc[date] = ret_values
ic_values = this_factors[['dx', 'f1', 'f2', 'f3']].corr().values[0, 1:]
df_ic.loc[date] = ic_values
print(f"{factor} is finished")
return {'ic': (df_ic.mean(axis=0), df_ic.std(axis=0) / math.sqrt(len(df_ic))),
'ret': (df_ret.mean(axis=0), df_ret.std(axis=0) / math.sqrt(len(df_ic))),
'factor': factor}
if __name__ == '__main__':
from dask.distributed import Client
try:
client = Client("10.63.6.176:8786")
cols = pd.MultiIndex.from_product([['mean', 'std'], ['raw', 'peer', 'neutralized']])
factors_ret = pd.DataFrame(columns=cols)
factors_ic = pd.DataFrame(columns=cols)
factors = ['ep_q',
'roe_q',
'SGRO',
'GREV',
'IVR',
'ILLIQUIDITY',
'con_target_price',
'con_pe_rolling_order',
'DividendPaidRatio']
l = client.map(factor_analysis, factors)
results = client.gather(l)
for res in results:
factor = res['factor']
factors_ret.loc[factor, 'mean'] = res['ret'][0].values
factors_ret.loc[factor, 'std'] = res['ret'][1].values
factors_ic.loc[factor, 'mean'] = res['ic'][0].values
factors_ic.loc[factor, 'std'] = res['ic'][1].values
print(factors_ret)
finally:
client.close()
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import datetime as dt
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
start = dt.datetime.now()
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('custom', ['zz500'])
neutralize_risk = ['SIZE'] + industry_styles
n_bins = 5
factor_weights = np.array([1.])
freq = '1w'
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
start_date = '2012-01-01'
end_date = '2012-08-01'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse',
dateRule=BizDayConventions.Following)
prod_factors = ['EPS']
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_all_data = all_data['factor']
total_df = pd.DataFrame()
for factor in prod_factors:
factors = [factor]
final_res = np.zeros((len(dates), n_bins))
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', factor, 'isOpen', 'weight'] + neutralize_risk]
codes = data.code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
print('{0}: {1}'.format(date, len(data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
benchmark = total_data.weight.values
f_data = total_data[factors]
try:
res = quantile_analysis(f_data,
factor_weights,
dx_return,
risk_exp=risk_exp,
n_bins=n_bins,
benchmark=benchmark)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot()
plt.title('{0} weekly re-balance'.format(factors[0]))
plt.savefig('{0}_big_universe_20170814.png'.format(factors[0]))
print('{0} is finished'.format(factor))
print(dt.datetime.now() - start)
plt.show()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import Tiny
from alphamind.data.dbmodel.models import LegacyFactor
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('custom', ['zz500'])
neutralize_risk = ['SIZE'] + industry_styles
n_bins = 24
factor_weights = np.array([1.])
freq = '1w'
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
start_date = '2016-04-01'
end_date = '2017-08-16'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
col_names = set()
factor_tables = [LegacyFactor]
for t in factor_tables:
for c in t.__table__.columns:
col_names.add(c.name)
col_names = col_names.difference(set(['trade_date', 'code']))
prod_factors = list(col_names)
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_all_data = all_data['factor']
total_df = pd.DataFrame()
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
for date, factor_data in factor_groups:
ref_date = date.strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
final_res = np.zeros((len(prod_factors), n_bins))
this_date_data = factor_data[['code', 'isOpen', 'weight'] + prod_factors + neutralize_risk]
this_date_data = pd.merge(this_date_data, returns, on=['code'])
codes = this_date_data.code.tolist()
for i, factor in enumerate(prod_factors):
factors = [factor]
total_data = this_date_data[['code', 'isOpen', 'weight', 'dx'] + factors + neutralize_risk].dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
benchmark = total_data.weight.values
f_data = total_data[factors]
try:
res = quantile_analysis(f_data,
factor_weights,
dx_return,
risk_exp=risk_exp,
n_bins=n_bins,
benchmark=benchmark)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=prod_factors)
df.sort_index(inplace=True)
df['trade_date'] = date
total_df = total_df.append(df)
print('{0} is finished'.format(date))
total_df.to_csv('d:/factor_eval_pm500_mirror.csv')
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
from alphamind.api import *
ref_date = '2017-11-21'
universe_name = ['zz500', 'hs300']
universe = Universe(universe_name, universe_name)
frequency = '5b'
batch = 8
neutralize_risk = ['SIZE'] + industry_styles
engine = SqlEngine()
linear_model_features = ['eps_q', 'roe_q', 'BDTO', 'CFinc1', 'CHV', 'IVR', 'VAL', 'GREV']
training_data = fetch_train_phase(engine,
linear_model_features,
ref_date,
frequency,
universe,
batch,
neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize],
warm_start=batch)
model = LinearRegression(linear_model_features, fit_intercept=False)
x = training_data['train']['x']
y = training_data['train']['y'].flatten()
model.fit(x, y)
print(model.impl.coef_)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment