Commit daa642f6 authored by Dr.李's avatar Dr.李

directly fetch return from market table

parent 4dbb172a
......@@ -30,6 +30,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import FullFactor
from alphamind.data.dbmodel.models import Models
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.transformer import Transformer
from alphamind.model.loader import load_model
......@@ -190,19 +191,19 @@ class SqlEngine(object):
start_date = ref_date
if not expiry_date:
end_date = advanceDateByCalendar('china.sse', ref_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
end_date = advanceDateByCalendar('china.sse', ref_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
else:
end_date = expiry_date
stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.code,
order_by=DailyReturn.trade_date,
rows=(DAILY_RETURN_OFFSET + offset, horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
stats = func.sum(self.ln_func(1. + Market.chgPct)).over(
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
query = select([DailyReturn.trade_date, DailyReturn.code, stats]).where(
query = select([Market.trade_date, Market.code, stats]).where(
and_(
DailyReturn.trade_date.between(start_date, end_date),
DailyReturn.code.in_(codes)
Market.trade_date.between(start_date, end_date),
Market.code.in_(codes)
)
)
......@@ -223,20 +224,20 @@ class SqlEngine(object):
start_date = dates[0]
end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
end_date = advanceDateByCalendar('china.sse', end_date, str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
cond = universe.query_range(start_date, end_date)
big_table = join(DailyReturn, UniverseTable,
and_(DailyReturn.trade_date == UniverseTable.trade_date,
DailyReturn.code == UniverseTable.code,
big_table = join(Market, UniverseTable,
and_(Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
cond))
stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.code,
order_by=DailyReturn.trade_date,
rows=(offset + DAILY_RETURN_OFFSET, horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
stats = func.sum(self.ln_func(1. + Market.chgPct)).over(
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + offset + DAILY_RETURN_OFFSET, 1 + horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
query = select([DailyReturn.trade_date, DailyReturn.code, stats]) \
query = select([Market.trade_date, Market.code, stats]) \
.select_from(big_table)
df = pd.read_sql(query, self.session.bind).dropna()
......
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *
plt.style.use('ggplot')
"""
Back test parameter settings
"""
start_date = '2017-01-01'
end_date = '2017-11-06'
benchmark_code = 905
universe_name = 'zz500'
universe = Universe(universe_name, [universe_name])
frequency = '2w'
batch = 4
method = 'risk_neutral'
use_rank = 100
industry_lower = 1.
industry_upper = 1.
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
horizon = map_freq(frequency)
executor = NaiveExecutor()
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
const_features = ["IVR", "eps_q", "DivP", "CFinc1", "BDTO"]
const_weights = np.array([0.05, 0.2, 0.075, 0.15, 0.05])
const_model = ConstLinearModel(features=const_features,
weights=const_weights)
linear_model_features = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'ivr': LAST('IVR'),
'val': LAST('VAL'),
'grev': LAST('GREV')
}
"""
Data phase
"""
engine = SqlEngine()
linear_model_factor_data = fetch_data_package(engine,
alpha_factors=linear_model_features,
start_date=start_date,
end_date=end_date,
frequency=frequency,
universe=universe,
benchmark=benchmark_code,
batch=batch,
neutralized_risk=neutralize_risk,
pre_process=[winsorize_normal, standardize],
post_process=[winsorize_normal, standardize],
warm_start=batch)
train_x = linear_model_factor_data['train']['x']
train_y = linear_model_factor_data['train']['y']
ref_dates = sorted(train_x.keys())
predict_x = linear_model_factor_data['predict']['x']
predict_y = linear_model_factor_data['predict']['y']
settlement = linear_model_factor_data['settlement']
linear_model_features = linear_model_factor_data['x_names']
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor']
const_return_data = engine.fetch_dx_return_range(universe, dates=ref_dates, horizon=horizon)
"""
Training phase
"""
models_series = pd.Series()
for ref_date in ref_dates:
x = train_x[ref_date]
y = train_y[ref_date].flatten()
model = LinearRegression(linear_model_features, fit_intercept=False)
model.fit(x, y)
models_series.loc[ref_date] = model
alpha_logger.info('trade_date: {0} training finished'.format(ref_date))
"""
Predicting and rebalance phase
"""
frequency = '1d'
horizon = map_freq(frequency)
dates = makeSchedule(start_date,
end_date,
tenor=frequency,
calendar='china.sse',
dateGenerationRule=DateGeneration.Forward)
total_factors = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'ivr': LAST('IVR'),
'val': LAST('VAL'),
'grev': LAST('GREV'),
'divp': LAST('DivP')
}
all_data = engine.fetch_data_range(universe, total_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
factor_groups = factor_all_data.groupby('trade_date')
rets = []
turn_overs = []
leverags = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
codes = data.code.tolist()
ref_date = date.strftime('%Y-%m-%d')
total_data = pd.merge(data, returns, on=['code']).dropna()
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
dx_return = total_data.dx.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.zeros(len(total_data))
ubound = 0.01 + benchmark_w
constraint = Constraints(risk_exp_expand, risk_names)
for i, name in enumerate(risk_names):
if name == 'total' or name == 'SIZE':
constraint.set_constraints(name,
lower_bound=risk_target[i],
upper_bound=risk_target[i])
else:
constraint.set_constraints(name,
lower_bound=risk_target[i] * industry_lower,
upper_bound=risk_target[i] * industry_upper)
factor_values = factor_processing(total_data[const_features].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er1 = const_model.predict(factor_values)
# linear regression model
models = models_series[models_series.index <= date]
model = models[-1]
x = predict_x[date]
er2 = model.predict(x)
# combine model
er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values})
er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': settlement.loc[settlement.trade_date == date, 'code'].values})
er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0)
er = (er_table.er1 + er_table.er2).values
target_pos, _ = er_portfolio_analysis(er,
industry,
dx_return,
constraint,
False,
benchmark_w,
method=method,
use_rank=use_rank)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = (result.weight_x - result.weight_y * leverage / result.weight_y.sum()).values @ np.exp(result.dx.values)
rets.append(ret)
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=ref_dates)
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0}'.format(frequency),
secondary_y='tc_cost')
plt.show()
......@@ -17,19 +17,10 @@ import datetime as dt
start = dt.datetime.now()
formula1 = CSRank(DIFF(LOG("turnoverVol")))
formula2 = CSRank((LAST('closePrice') - LAST('openPrice')) / LAST('openPrice'))
expression = -CORR(6, formula1 ^ formula2)
# expression1 = -0.6 * LAST('con_pe_rolling') - 0.6 * LAST('con_pb_rolling') + 0.6 * LAST('con_eps') + 1.2 * LAST('con_target_price')
# expression2 = LAST("IVR")
universe_name = 'zz500'
#expression = expression1 + expression2
# factor1 = LAST('RVOL')
# factor2 = LAST('IVR')
# expression = RES(20, factor2 ^ factor1)
# expression = MA(1, "EPS")
factor_name = 'ROIC'
expression = LAST(factor_name)
alpha_factor_name = 'alpha_factor'
alpha_factor = {alpha_factor_name: expression}
......@@ -37,22 +28,26 @@ alpha_factor = {alpha_factor_name: expression}
# end of formula definition
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('custom', ['ashare_ex'])
universe = Universe('custom', [universe_name])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
freq = '1w'
freq = '2w'
n_bins = 5
horizon = map_freq(freq)
dates = makeSchedule('2012-01-01',
'2017-10-11',
start_date = '2012-01-01'
end_date = '2017-11-03'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates,
benchmark=905)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=4)
benchmark=benchmark_code)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
......@@ -91,7 +86,68 @@ df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot()
plt.show()
fig, axes = plt.subplots(1, 2, figsize=(18, 6))
df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(factor_name))
# =================================================================== #
factor_name = 'ROE'
expression = LAST(factor_name)
alpha_factor_name = 'alpha_factor'
alpha_factor = {alpha_factor_name: expression}
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates,
benchmark=benchmark_code)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(dates), n_bins))
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen', 'weight'] + neutralize_risk]
codes = data.code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
benchmark = total_data.weight.values
f_data = total_data[[alpha_factor_name]]
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return,
benchmark=benchmark)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot(ax=axes[1], title='Quantile Analysis for {0}'.format(factor_name))
plt.show()
print(dt.datetime.now() - start)
\ No newline at end of file
......@@ -28,8 +28,8 @@ base factors - all the risk styles
quantiles - 5
start_date - 2012-01-01
end_date - 2017-08-01
re-balance - 1 week
training - every 4 week
re-balance - 2 week
training - every 8 week
'''
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
......@@ -37,13 +37,24 @@ universe = Universe('zz500', ['zz500'])
neutralize_risk = industry_styles
portfolio_risk_neutralize = []
portfolio_industry_neutralize = True
alpha_factors = ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO'] # ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
alpha_factors = {
'eps': LAST('eps_q'),
'roe': LAST('roe_q'),
'bdto': LAST('BDTO'),
'cfinc1': LAST('CFinc1'),
'chv': LAST('CHV'),
'rvol': LAST('RVOL'),
'val': LAST('VAL'),
'grev': LAST('GREV'),
'droeafternonorecurring': LAST('DROEAfterNonRecurring')}
benchmark = 905
n_bins = 5
frequency = '2w'
batch = 4
start_date = '2017-01-01'
end_date = '2017-09-26'
batch = 8
start_date = '2012-01-01'
end_date = '2017-11-05'
method = 'risk_neutral'
use_rank = 100
......@@ -74,24 +85,10 @@ train_y = data_package['train']['y']
dates = sorted(train_x.keys())
model_df = pd.Series()
features = data_package['x_names']
for train_date in dates:
model = LinearRegression(alpha_factors, fit_intercept=False)
#model = LassoCV(fit_intercept=False)
# model = AdaBoostRegressor(n_estimators=100)
#model = RandomForestRegressor(n_estimators=100, n_jobs=4)
#model = NuSVR(kernel='rbf', C=1e-3, gamma=0.1)
# model = ConstLinearModel(alpha_factors, np.array([0.034129344,
# 0.015881607,
# 0.048765746,
# 0.042747382,
# -0.015900173,
# 0.019044573,
# -0.001792638,
# 0.014277867,
# ]))
# model = ConstLinearModel(alpha_factors, np.array([1.] * len(alpha_factors)))
model = LinearRegression(features, fit_intercept=False)
x = train_x[train_date]
y = train_y[train_date]
......@@ -99,43 +96,14 @@ for train_date in dates:
model_df.loc[train_date] = model
alpha_logger.info('trade_date: {0} training finished'.format(train_date))
'''
predicting phase: using trained model on the re-balance dates
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
predict_x = data_package['predict']['x']
settlement = data_package['settlement']
# final_res = np.zeros((len(dates), n_bins))
#
# for i, predict_date in enumerate(dates):
# model = model_df[predict_date]
# x = predict_x[predict_date]
# benchmark_w = settlement[settlement.trade_date == predict_date]['weight'].values
# realized_r = settlement[settlement.trade_date == predict_date]['dx'].values
#
# predict_y = model.predict(x)
#
# res = er_quantile_analysis(predict_y,
# n_bins,
# dx_return=realized_r,
# benchmark=benchmark_w)
#
# final_res[i] = res / benchmark_w.sum()
# print('trade_date: {0} predicting finished'.format(train_date))
#
# last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
#
# df = pd.DataFrame(final_res, index=dates[1:] + [last_date])
# df.sort_index(inplace=True)
# df.cumsum().plot()
# plt.title('Risk style factors model training with Linear Regression from 2012 - 2017')
# plt.show()
'''
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
industry_dummies = pd.get_dummies(settlement['industry'].values)
risk_styles = settlement[portfolio_risk_neutralize].values
......@@ -187,11 +155,6 @@ for i, predict_date in enumerate(dates):
method=method,
use_rank=use_rank)
# model_res = pd.DataFrame({'weight': model.coef_[0],
# 'factor': np.array(data_package['x_names'])})
# model_res.to_csv(r'\\10.63.6.71\sharespace\personal\licheng\portfolio\zz500_model\{0}.csv'.format(predict_date.strftime('%Y-%m-%d')))
final_res[i] = analysis['er']['total'] / benchmark_w.sum()
alpha_logger.info('trade_date: {0} predicting finished'.format(predict_date))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment