Commit aa7b9e86 authored by Dr.李's avatar Dr.李

using more abstract factor model

parent b482c543
...@@ -44,6 +44,7 @@ from alphamind.data.engines.utilities import _map_risk_model_table ...@@ -44,6 +44,7 @@ from alphamind.data.engines.utilities import _map_risk_model_table
from alphamind.data.engines.utilities import factor_tables from alphamind.data.engines.utilities import factor_tables
from alphamind.data.engines.utilities import industry_list from alphamind.data.engines.utilities import industry_list
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.portfolio.riskmodel import FactorRiskModel
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
...@@ -525,7 +526,8 @@ class SqlEngine(object): ...@@ -525,7 +526,8 @@ class SqlEngine(object):
ref_date: str, ref_date: str,
codes: Iterable[int], codes: Iterable[int],
risk_model: str = 'short', risk_model: str = 'short',
excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]: excluded: Iterable[str] = None,
model_type: str = None) -> Union[FactorRiskModel, Tuple[pd.DataFrame, pd.DataFrame]]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors] cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
...@@ -552,11 +554,20 @@ class SqlEngine(object): ...@@ -552,11 +554,20 @@ class SqlEngine(object):
.select_from(big_table).where( .select_from(big_table).where(
and_(RiskExposure.trade_date == ref_date, and_(RiskExposure.trade_date == ref_date,
RiskExposure.code.in_(codes) RiskExposure.code.in_(codes)
)).distinct() ))
risk_exp = pd.read_sql(query, self.engine).dropna() risk_exp = pd.read_sql(query, self.engine).dropna()
return risk_cov, risk_exp.drop_duplicates(['code']) if not model_type:
return risk_cov, risk_exp
elif model_type == 'factor':
factor_names = risk_cov.Factor.tolist()
risk_cov.set_index('Factor', inplace=True)
factor_cov = risk_cov.loc[factor_names, factor_names] / 10000.
risk_exp.set_index('code', inplace=True)
factor_loading = risk_exp.loc[:, factor_names]
idsync = risk_exp['srisk'] * risk_exp['srisk'] / 10000
return FactorRiskModel(factor_cov, factor_loading, idsync)
def fetch_risk_model_range(self, def fetch_risk_model_range(self,
universe: Universe, universe: Universe,
...@@ -564,12 +575,11 @@ class SqlEngine(object): ...@@ -564,12 +575,11 @@ class SqlEngine(object):
end_date: str = None, end_date: str = None,
dates: Iterable[str] = None, dates: Iterable[str] = None,
risk_model: str = 'short', risk_model: str = 'short',
excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]: excluded: Iterable[str] = None,
model_type: str = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors] cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date, cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date,
end_date) end_date)
query = select([risk_cov_table.trade_date, query = select([risk_cov_table.trade_date,
...@@ -578,16 +588,13 @@ class SqlEngine(object): ...@@ -578,16 +588,13 @@ class SqlEngine(object):
+ cov_risk_cols).where( + cov_risk_cols).where(
cond cond
) )
risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID']) risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID'])
if not excluded: if not excluded:
excluded = [] excluded = []
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)] risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)]
cond = universe._query_statements(start_date, end_date, dates) cond = universe._query_statements(start_date, end_date, dates)
big_table = join(RiskExposure, UniverseTable, big_table = join(RiskExposure, UniverseTable,
and_( and_(
RiskExposure.trade_date == UniverseTable.trade_date, RiskExposure.trade_date == UniverseTable.trade_date,
...@@ -611,7 +618,24 @@ class SqlEngine(object): ...@@ -611,7 +618,24 @@ class SqlEngine(object):
risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna() risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna()
if not model_type:
return risk_cov, risk_exp return risk_cov, risk_exp
elif model_type == 'factor':
risk_cov.set_index('Factor', inplace=True)
risk_exp.set_index('code', inplace=True)
risk_cov_groups = risk_cov.groupby('trade_date')
risk_exp_groups = risk_exp.groupby('trade_date')
models = dict()
for ref_date, cov_g in risk_cov_groups:
exp_g = risk_exp_groups.get_group(ref_date)
factor_names = cov_g.index.tolist()
factor_cov = cov_g.loc[factor_names, factor_names] / 10000.
factor_loading = exp_g.loc[:, factor_names]
idsync = exp_g['srisk'] * exp_g['srisk'] / 10000
models[ref_date] = FactorRiskModel(factor_cov, factor_loading, idsync)
return pd.Series(models)
def fetch_industry(self, def fetch_industry(self,
ref_date: str, ref_date: str,
...@@ -1023,11 +1047,9 @@ if __name__ == '__main__': ...@@ -1023,11 +1047,9 @@ if __name__ == '__main__':
from PyFin.api import * from PyFin.api import *
engine = SqlEngine() engine = SqlEngine()
ref_date = '2017-05-03' ref_date = '2017-05-03'
universe = Universe('custon', ['zz800']) universe = Universe('zz800')
dates = bizDatesList('china.sse', '2018-05-01', '2018-05-10')
codes = engine.fetch_codes(ref_date, universe) dates = [d.strftime('%Y-%m-%d') for d in dates]
# df = engine.fetch_trade_status(ref_date, codes, offset=1)
dates = ['2017-05-02', '2017-05-03', '2017-05-04'] res = engine.fetch_risk_model_range(universe, dates=dates, model_type='factor')
df = engine.fetch_trade_status_range(universe, dates=dates, offset=1) print(res)
print(df) \ No newline at end of file
\ No newline at end of file
...@@ -87,10 +87,11 @@ class Strategy(object): ...@@ -87,10 +87,11 @@ class Strategy(object):
benchmark=self.benchmark) benchmark=self.benchmark)
alpha_logger.info("benchmark data loading finished ...") alpha_logger.info("benchmark data loading finished ...")
total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.risk_models = self.engine.fetch_risk_model_range(
self.universe, self.universe,
dates=self.dates, dates=self.dates,
risk_model=self.data_meta.risk_model risk_model=self.data_meta.risk_model,
model_type='factor'
) )
alpha_logger.info("risk_model data loading finished ...") alpha_logger.info("risk_model data loading finished ...")
...@@ -104,7 +105,6 @@ class Strategy(object): ...@@ -104,7 +105,6 @@ class Strategy(object):
total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left')
total_data.fillna({'weight': 0.}, inplace=True) total_data.fillna({'weight': 0.}, inplace=True)
total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code'])
is_in_benchmark = (total_data.weight > 0.).astype(float).values.reshape((-1, 1)) is_in_benchmark = (total_data.weight > 0.).astype(float).values.reshape((-1, 1))
total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'benchmark'] = is_in_benchmark
...@@ -115,7 +115,6 @@ class Strategy(object): ...@@ -115,7 +115,6 @@ class Strategy(object):
horizon=self.horizon, horizon=self.horizon,
offset=1).set_index('trade_date') offset=1).set_index('trade_date')
self.total_data = total_data self.total_data = total_data
self.total_risk_cov = total_risk_cov
def _create_lu_bounds(self, running_setting, codes, benchmark_w): def _create_lu_bounds(self, running_setting, codes, benchmark_w):
...@@ -178,6 +177,7 @@ class Strategy(object): ...@@ -178,6 +177,7 @@ class Strategy(object):
models = dict(results) models = dict(results)
for ref_date, this_data in total_data_groups: for ref_date, this_data in total_data_groups:
risk_model = self.risk_models[ref_date]
new_model = models[ref_date] new_model = models[ref_date]
codes = this_data.code.values.tolist() codes = this_data.code.values.tolist()
...@@ -190,12 +190,6 @@ class Strategy(object): ...@@ -190,12 +190,6 @@ class Strategy(object):
remained_pos.fillna(0., inplace=True) remained_pos.fillna(0., inplace=True)
current_position = remained_pos.weight.values current_position = remained_pos.weight.values
if running_setting.rebalance_method == 'tv':
risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date == ref_date]
sec_cov = self._generate_sec_cov(this_data, risk_cov)
else:
sec_cov = None
benchmark_w = this_data.weight.values benchmark_w = this_data.weight.values
constraints = LinearConstraints(running_setting.bounds, constraints = LinearConstraints(running_setting.bounds,
this_data, this_data,
...@@ -203,22 +197,14 @@ class Strategy(object): ...@@ -203,22 +197,14 @@ class Strategy(object):
lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w) lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w)
features = new_model.features this_data.fillna(0, inplace=True)
dfs = [] new_factors = factor_processing(this_data[new_model.features].values,
for name in features:
data_cleaned = this_data.dropna(subset=[name])
raw_factors = data_cleaned[[name]].values
new_factors = factor_processing(raw_factors,
pre_process=self.data_meta.pre_process, pre_process=self.data_meta.pre_process,
risk_factors=data_cleaned[ risk_factors=this_data[
self.data_meta.neutralized_risk].values.astype( self.data_meta.neutralized_risk].values.astype(
float) if self.data_meta.neutralized_risk else None, float) if self.data_meta.neutralized_risk else None,
post_process=self.data_meta.post_process) post_process=self.data_meta.post_process)
df = pd.DataFrame(new_factors, columns=[name], index=data_cleaned.code) new_factors = pd.DataFrame(new_factors, columns=new_model.features, index=codes)
dfs.append(df)
new_factors = pd.concat(dfs, axis=1)
new_factors = new_factors.loc[codes].fillna(new_factors.median())
er = new_model.predict(new_factors).astype(float) er = new_model.predict(new_factors).astype(float)
alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er))) alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er)))
...@@ -229,7 +215,7 @@ class Strategy(object): ...@@ -229,7 +215,7 @@ class Strategy(object):
benchmark_w, benchmark_w,
lbound, lbound,
ubound, ubound,
sec_cov=sec_cov, risk_model=risk_model.get_risk_profile(codes),
current_position=current_position) current_position=current_position)
target_pos['code'] = codes target_pos['code'] = codes
...@@ -260,15 +246,7 @@ class Strategy(object): ...@@ -260,15 +246,7 @@ class Strategy(object):
return ret_df, positions return ret_df, positions
@staticmethod def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, risk_model,
def _generate_sec_cov(current_data, risk_cov):
risk_exposure = current_data[all_styles].values
risk_cov = risk_cov[all_styles].values
special_risk = current_data['srisk'].values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
return sec_cov
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, sec_cov,
current_position): current_position):
more_opts = running_setting.more_opts more_opts = running_setting.more_opts
target_pos, _ = er_portfolio_analysis(er, target_pos, _ = er_portfolio_analysis(er,
...@@ -282,7 +260,7 @@ class Strategy(object): ...@@ -282,7 +260,7 @@ class Strategy(object):
ubound=ubound, ubound=ubound,
current_position=current_position, current_position=current_position,
target_vol=more_opts.get('target_vol'), target_vol=more_opts.get('target_vol'),
cov=sec_cov, risk_model=risk_model,
turn_over_target=more_opts.get('turn_over_target')) turn_over_target=more_opts.get('turn_over_target'))
return target_pos return target_pos
...@@ -291,7 +269,6 @@ if __name__ == '__main__': ...@@ -291,7 +269,6 @@ if __name__ == '__main__':
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from dask.distributed import Client from dask.distributed import Client
from PyFin.api import CSQuantiles from PyFin.api import CSQuantiles
from PyFin.api import CSMeanAdjusted
from PyFin.api import LAST from PyFin.api import LAST
from alphamind.api import Universe from alphamind.api import Universe
from alphamind.api import ConstLinearModel from alphamind.api import ConstLinearModel
...@@ -308,7 +285,7 @@ if __name__ == '__main__': ...@@ -308,7 +285,7 @@ if __name__ == '__main__':
mpl.rcParams['font.sans-serif'] = ['SimHei'] mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False mpl.rcParams['axes.unicode_minus'] = False
start_date = '2017-01-01' start_date = '2010-05-01'
end_date = '2018-05-17' end_date = '2018-05-17'
freq = '10b' freq = '10b'
neutralized_risk = None neutralized_risk = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment