Commit 8c79003a authored by Dr.李's avatar Dr.李

added strategy

parent 69504bfe
...@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal ...@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
from alphamind.data.rank import rank from alphamind.data.rank import rank
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
PROCESS_MAPPING = { PROCESS_MAPPING = {
'winsorize_normal': winsorize_normal, 'winsorize_normal': winsorize_normal,
...@@ -144,11 +145,13 @@ def train_model(ref_date: str, ...@@ -144,11 +145,13 @@ def train_model(ref_date: str,
x_values: pd.DataFrame = None, x_values: pd.DataFrame = None,
y_values: pd.DataFrame = None): y_values: pd.DataFrame = None):
base_model = copy.deepcopy(alpha_model) base_model = copy.deepcopy(alpha_model)
if x_values is None:
train_data = data_meta.fetch_train_data(ref_date, alpha_model) if not isinstance(alpha_model, ConstLinearModel):
x_values = train_data['train']['x'] if x_values is None:
y_values = train_data['train']['y'] train_data = data_meta.fetch_train_data(ref_date, alpha_model)
base_model.fit(x_values, y_values) x_values = train_data['train']['x']
y_values = train_data['train']['y']
base_model.fit(x_values, y_values)
return base_model return base_model
......
...@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object): ...@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object):
self.weights = weights.flatten() self.weights = weights.flatten()
def fit(self, x: np.ndarray, y: np.ndarray): def fit(self, x: np.ndarray, y: np.ndarray):
pass raise NotImplementedError("Const linear model doesn't offer fit methodology")
def predict(self, x: np.ndarray): def predict(self, x: np.ndarray):
return x @ self.weights return x @ self.weights
......
...@@ -108,7 +108,7 @@ class LinearConstraints(object): ...@@ -108,7 +108,7 @@ class LinearConstraints(object):
backbone: np.ndarray=None): backbone: np.ndarray=None):
pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat") pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
self.names = list(bounds.keys()) self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns)))
self.bounds = bounds self.bounds = bounds
self.cons_mat = cons_mat self.cons_mat = cons_mat
self.backbone = backbone self.backbone = backbone
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-9-14 Created on 2018-5-3
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import copy
import numpy as np
import pandas as pd import pandas as pd
import alphamind.data as data_module from PyFin.api import makeSchedule
import alphamind.model as model_module from alphamind.utilities import map_freq
from alphamind.data.engines.universe import Universe from alphamind.utilities import alpha_logger
from alphamind.model.modelbase import ModelBase from alphamind.model.composer import train_model
from alphamind.data.engines.sqlengine import industry_styles from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.data.engines.sqlengine import risk_styles from alphamind.data.engines.sqlengine import risk_styles
from alphamind.model.data_preparing import fetch_data_package from alphamind.data.engines.sqlengine import industry_styles
from alphamind.model.data_preparing import fetch_predict_phase from alphamind.data.engines.sqlengine import macro_styles
from alphamind.model.data_preparing import fetch_train_phase from alphamind.data.processing import factor_processing
from alphamind.analysis.factoranalysis import er_portfolio_analysis
def load_process(names: list) -> list:
return [getattr(data_module, name) for name in names]
def load_neutralize_risks(names: list) -> list: all_styles = risk_styles + industry_styles + macro_styles
risks = [] total_risk_names = ['benchmark', 'total']
for name in names: b_type = []
if name == 'industry_styles': l_val = []
risks.extend(industry_styles) u_val = []
elif name == 'risk_styles':
risks.extend(risk_styles)
else:
risks.append(name)
return risks for name in total_risk_names:
if name == 'benchmark':
b_type.append(BoundaryType.RELATIVE)
l_val.append(0.8)
u_val.append(1.0)
else:
b_type.append(BoundaryType.RELATIVE)
l_val.append(1.0)
u_val.append(1.0)
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
def load_model_meta(name: str) -> ModelBase:
return getattr(model_module, name)
class RunningSetting(object):
def load_universe(universe: list) -> Universe: def __init__(self,
return Universe(universe[0], universe[1]) universe,
start_date,
end_date,
freq,
benchmark=905,
industry_cat='sw_adj',
industry_level=1,
rebalance_method='risk_neutral',
**kwargs):
self.universe = universe
self.dates = makeSchedule(start_date, end_date, freq, 'china.sse')
self.dates = [d.strftime('%Y-%m-%d') for d in self.dates]
self.benchmark = benchmark
self.horizon = map_freq(freq)
self.executor = NaiveExecutor()
self.industry_cat = industry_cat
self.industry_level = industry_level
self.rebalance_method = rebalance_method
self.more_opts = kwargs
class Strategy(object): class Strategy(object):
def __init__(self, def __init__(self,
data_source, alpha_model,
strategy_desc: dict, data_meta,
cache_start_date=None, running_setting):
cache_end_date=None): self.alpha_model = alpha_model
self.data_source = data_source self.data_meta = data_meta
self.strategy_name = strategy_desc['strategy_name'] self.running_setting = running_setting
self.pre_process = load_process(strategy_desc['data_process']['pre_process']) self.engine = self.data_meta.engine
self.post_process = load_process(strategy_desc['data_process']['pre_process'])
self.neutralize_risk = load_neutralize_risks(strategy_desc['data_process']['neutralize_risk']) def run(self):
self.risk_model = strategy_desc['risk_model']
alpha_logger.info("starting backting ...")
self.model_type = load_model_meta(strategy_desc['alpha_model'])
self.parameters = strategy_desc['parameters'] total_factors = self.engine.fetch_factor_range(self.running_setting.universe,
self.features = strategy_desc['features'] self.alpha_model.formulas,
self.model = self.model_type(features=self.features, **self.parameters) dates=self.running_setting.dates)
alpha_logger.info("alpha factor data loading finished ...")
self.is_const_model = isinstance(self.model, model_module.ConstLinearModel)
total_industry = self.engine.fetch_industry_matrix_range(universe,
if self.is_const_model: dates=self.running_setting.dates,
self.weights = strategy_desc['weights'] category=self.running_setting.industry_cat,
level=self.running_setting.industry_level)
self.freq = strategy_desc['freq'] alpha_logger.info("industry data loading finished ...")
self.universe = load_universe(strategy_desc['universe'])
self.benchmark = strategy_desc['benchmark'] total_benchmark = self.engine.fetch_benchmark_range(dates=self.running_setting.dates,
benchmark=self.running_setting.benchmark)
self.batch = strategy_desc['batch'] alpha_logger.info("benchmark data loading finished ...")
self.warm_start = strategy_desc['warm_start']
total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range(
if cache_start_date and cache_end_date: universe,
self.cached_data = fetch_data_package(self.data_source, dates=self.running_setting.dates,
self.features, risk_model=self.data_meta.risk_model
cache_start_date, )
cache_end_date, alpha_logger.info("risk_model data loading finished ...")
self.freq,
self.universe, total_returns = self.engine.fetch_dx_return_range(self.running_setting.universe,
self.benchmark, dates=self.running_setting.dates,
self.warm_start, horizon=self.running_setting.horizon,
self.batch, offset=1)
self.neutralize_risk, alpha_logger.info("returns data loading finished ...")
self.risk_model,
self.pre_process, total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code'])
self.post_process) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left')
total_data.fillna({'weight': 0.}, inplace=True)
# some cached data to fast processing total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
settlement_data = self.cached_data['settlement'] total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']).fillna(total_data.median())
self.settle_dfs = settlement_data.set_index('code').groupby('trade_date')
total_data_groups = total_data.groupby('trade_date')
self.scheduled_dates = set(k.strftime('%Y-%m-%d') for k in self.cached_data['train']['x'].keys())
else: rets = []
self.cached_data = None turn_overs = []
self.scheduled_dates = None executor = copy.deepcopy(self.running_setting.executor)
positions = pd.DataFrame()
def cached_dates(self):
return sorted(self.scheduled_dates) for ref_date, this_data in total_data_groups:
new_model = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta)
def model_train(self, ref_date: str):
codes = this_data.code.values.tolist()
if not self.is_const_model:
if self.cached_data and ref_date in self.scheduled_dates: if self.running_setting.rebalance_method == 'tv':
ref_date = dt.datetime.strptime(ref_date, '%Y-%m-%d') risk_cov = total_risk_cov[total_risk_cov.trade_date == ref_date]
ne_x = self.cached_data['train']['x'][ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov)
ne_y = self.cached_data['train']['y'][ref_date]
else: else:
data = fetch_train_phase(self.data_source, sec_cov = None
self.features,
ref_date, benchmark_w = this_data.weight.values
self.freq, is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))
self.universe, constraints_exp = np.concatenate([is_in_benchmark,
self.batch, np.ones_like(is_in_benchmark)],
self.neutralize_risk, axis=1)
self.risk_model, constraints_exp = pd.DataFrame(constraints_exp, columns=['benchmark', 'total'])
self.pre_process, constraints = LinearConstraints(bounds, constraints_exp, benchmark_w)
self.post_process,
self.warm_start) lbound = np.maximum(0., benchmark_w - 0.02)
ubound = 0.02 + benchmark_w
ne_x = data['train']['x']
ne_y = data['train']['y'] features = new_model.features
self.model.fit(ne_x, ne_y) raw_factors = this_data[features].values
new_factors = factor_processing(raw_factors,
def model_predict(self, ref_date: str) -> pd.DataFrame: pre_process=self.data_meta.pre_process,
if self.cached_data and ref_date in self.scheduled_dates: risk_factors=self.data_meta.neutralized_risk,
ref_date = dt.datetime.strptime(ref_date, '%Y-%m-%d') post_process=self.data_meta.post_process)
ne_x = self.cached_data['predict']['x'][ref_date]
settlement_data = self.cached_data['settlement'] er = new_model.predict(pd.DataFrame(new_factors, columns=features))
codes = settlement_data.loc[settlement_data.trade_date == ref_date, 'code'].values
else: alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er)))
data = fetch_predict_phase(self.data_source, target_pos, _ = er_portfolio_analysis(er,
self.features, this_data.industry_name.values,
ref_date, None,
self.freq, constraints,
self.universe, False,
self.batch, benchmark_w,
self.neutralize_risk, method=self.running_setting.rebalance_method,
self.risk_model, lbound=lbound,
self.pre_process, ubound=ubound,
self.post_process, target_vol=0.05,
self.warm_start) cov=sec_cov)
ne_x = data['predict']['x'] target_pos['code'] = codes
codes = data['predict']['code'] target_pos['trade_date'] = ref_date
target_pos['benchmark_weight'] = benchmark_w
prediction = self.model.predict(ne_x).flatten() target_pos['dx'] = this_data.dx.values
return pd.DataFrame({'prediction': prediction,
'code': codes}) turn_over, executed_pos = executor.execute(target_pos=target_pos)
def settlement(self, ref_date: str, prediction: pd.DataFrame) -> float: ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.)
settlement_data = self.settle_dfs.get_group(ref_date)[['dx', 'weight']] rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
positions = positions.append(target_pos)
trade_dates = positions.trade_date.unique()
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs}, index=trade_dates)
index_return = self.engine.fetch_dx_return_index_range(self.running_setting.benchmark,
dates=self.running_setting.dates,
horizon=self.running_setting.horizon,
offset=1).set_index('trade_date')
ret_df['benchmark_returns'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns']
return ret_df, positions
@staticmethod
def _generate_sec_cov(current_data, risk_cov):
risk_exposure = current_data[all_styles].values
risk_cov = risk_cov[all_styles].values
special_risk = current_data['srisk'].values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
return sec_cov
if __name__ == '__main__': if __name__ == '__main__':
import json from matplotlib import pyplot as plt
import pprint from PyFin.api import *
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.api import Universe
from PyFin.api import makeSchedule from alphamind.api import ConstLinearModel
from alphamind.api import DataMeta
engine = SqlEngine()
start_date = '2010-01-01'
start_date = '2017-06-01' end_date = '2018-04-19'
end_date = '2017-09-14' freq = '10b'
neutralized_risk = None
with open("sample_strategy.json", 'r') as fp: universe = Universe("custom", ['zz800'])
strategy_desc = json.load(fp)
strategy = Strategy(engine, strategy_desc, start_date, end_date) factor = 'RVOL'
alpha_factors = {'f01': CSQuantiles(LAST(factor), groups='sw1_adj')}
dates = strategy.cached_dates() weights = {'f01': 1.}
print(dates) alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
for date in dates: data_meta = DataMeta(freq=freq,
strategy.model_train(date) universe=universe,
prediction = strategy.model_predict(date) batch=1)
strategy.settlement(date, prediction)
\ No newline at end of file running_setting = RunningSetting(universe,
start_date,
end_date,
freq,
rebalance_method='tv')
strategy = Strategy(alpha_model, data_meta, running_setting)
ret_df, positions = strategy.run()
ret_df['excess_return'].cumsum().plot()
plt.title(f"{factor}")
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment