Commit aa7b9e86 authored by Dr.李's avatar Dr.李

using more abstract factor model

parent b482c543
......@@ -44,6 +44,7 @@ from alphamind.data.engines.utilities import _map_risk_model_table
from alphamind.data.engines.utilities import factor_tables
from alphamind.data.engines.utilities import industry_list
from alphamind.data.processing import factor_processing
from alphamind.portfolio.riskmodel import FactorRiskModel
from PyFin.api import advanceDateByCalendar
......@@ -525,7 +526,8 @@ class SqlEngine(object):
ref_date: str,
codes: Iterable[int],
risk_model: str = 'short',
excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
excluded: Iterable[str] = None,
model_type: str = None) -> Union[FactorRiskModel, Tuple[pd.DataFrame, pd.DataFrame]]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
......@@ -552,11 +554,20 @@ class SqlEngine(object):
.select_from(big_table).where(
and_(RiskExposure.trade_date == ref_date,
RiskExposure.code.in_(codes)
)).distinct()
))
risk_exp = pd.read_sql(query, self.engine).dropna()
return risk_cov, risk_exp.drop_duplicates(['code'])
if not model_type:
return risk_cov, risk_exp
elif model_type == 'factor':
factor_names = risk_cov.Factor.tolist()
risk_cov.set_index('Factor', inplace=True)
factor_cov = risk_cov.loc[factor_names, factor_names] / 10000.
risk_exp.set_index('code', inplace=True)
factor_loading = risk_exp.loc[:, factor_names]
idsync = risk_exp['srisk'] * risk_exp['srisk'] / 10000
return FactorRiskModel(factor_cov, factor_loading, idsync)
def fetch_risk_model_range(self,
universe: Universe,
......@@ -564,12 +575,11 @@ class SqlEngine(object):
end_date: str = None,
dates: Iterable[str] = None,
risk_model: str = 'short',
excluded: Iterable[str] = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
excluded: Iterable[str] = None,
model_type: str = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date,
end_date)
query = select([risk_cov_table.trade_date,
......@@ -578,16 +588,13 @@ class SqlEngine(object):
+ cov_risk_cols).where(
cond
)
risk_cov = pd.read_sql(query, self.engine).sort_values(['trade_date', 'FactorID'])
if not excluded:
excluded = []
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)]
cond = universe._query_statements(start_date, end_date, dates)
big_table = join(RiskExposure, UniverseTable,
and_(
RiskExposure.trade_date == UniverseTable.trade_date,
......@@ -611,7 +618,24 @@ class SqlEngine(object):
risk_exp = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).dropna()
return risk_cov, risk_exp
if not model_type:
return risk_cov, risk_exp
elif model_type == 'factor':
risk_cov.set_index('Factor', inplace=True)
risk_exp.set_index('code', inplace=True)
risk_cov_groups = risk_cov.groupby('trade_date')
risk_exp_groups = risk_exp.groupby('trade_date')
models = dict()
for ref_date, cov_g in risk_cov_groups:
exp_g = risk_exp_groups.get_group(ref_date)
factor_names = cov_g.index.tolist()
factor_cov = cov_g.loc[factor_names, factor_names] / 10000.
factor_loading = exp_g.loc[:, factor_names]
idsync = exp_g['srisk'] * exp_g['srisk'] / 10000
models[ref_date] = FactorRiskModel(factor_cov, factor_loading, idsync)
return pd.Series(models)
def fetch_industry(self,
ref_date: str,
......@@ -1023,11 +1047,9 @@ if __name__ == '__main__':
from PyFin.api import *
engine = SqlEngine()
ref_date = '2017-05-03'
universe = Universe('custon', ['zz800'])
codes = engine.fetch_codes(ref_date, universe)
# df = engine.fetch_trade_status(ref_date, codes, offset=1)
universe = Universe('zz800')
dates = bizDatesList('china.sse', '2018-05-01', '2018-05-10')
dates = [d.strftime('%Y-%m-%d') for d in dates]
dates = ['2017-05-02', '2017-05-03', '2017-05-04']
df = engine.fetch_trade_status_range(universe, dates=dates, offset=1)
print(df)
\ No newline at end of file
res = engine.fetch_risk_model_range(universe, dates=dates, model_type='factor')
print(res)
\ No newline at end of file
......@@ -87,10 +87,11 @@ class Strategy(object):
benchmark=self.benchmark)
alpha_logger.info("benchmark data loading finished ...")
total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range(
self.risk_models = self.engine.fetch_risk_model_range(
self.universe,
dates=self.dates,
risk_model=self.data_meta.risk_model
risk_model=self.data_meta.risk_model,
model_type='factor'
)
alpha_logger.info("risk_model data loading finished ...")
......@@ -104,7 +105,6 @@ class Strategy(object):
total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left')
total_data.fillna({'weight': 0.}, inplace=True)
total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code'])
is_in_benchmark = (total_data.weight > 0.).astype(float).values.reshape((-1, 1))
total_data.loc[:, 'benchmark'] = is_in_benchmark
......@@ -115,7 +115,6 @@ class Strategy(object):
horizon=self.horizon,
offset=1).set_index('trade_date')
self.total_data = total_data
self.total_risk_cov = total_risk_cov
def _create_lu_bounds(self, running_setting, codes, benchmark_w):
......@@ -178,6 +177,7 @@ class Strategy(object):
models = dict(results)
for ref_date, this_data in total_data_groups:
risk_model = self.risk_models[ref_date]
new_model = models[ref_date]
codes = this_data.code.values.tolist()
......@@ -190,12 +190,6 @@ class Strategy(object):
remained_pos.fillna(0., inplace=True)
current_position = remained_pos.weight.values
if running_setting.rebalance_method == 'tv':
risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date == ref_date]
sec_cov = self._generate_sec_cov(this_data, risk_cov)
else:
sec_cov = None
benchmark_w = this_data.weight.values
constraints = LinearConstraints(running_setting.bounds,
this_data,
......@@ -203,22 +197,14 @@ class Strategy(object):
lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w)
features = new_model.features
dfs = []
for name in features:
data_cleaned = this_data.dropna(subset=[name])
raw_factors = data_cleaned[[name]].values
new_factors = factor_processing(raw_factors,
pre_process=self.data_meta.pre_process,
risk_factors=data_cleaned[
self.data_meta.neutralized_risk].values.astype(
float) if self.data_meta.neutralized_risk else None,
post_process=self.data_meta.post_process)
df = pd.DataFrame(new_factors, columns=[name], index=data_cleaned.code)
dfs.append(df)
new_factors = pd.concat(dfs, axis=1)
new_factors = new_factors.loc[codes].fillna(new_factors.median())
this_data.fillna(0, inplace=True)
new_factors = factor_processing(this_data[new_model.features].values,
pre_process=self.data_meta.pre_process,
risk_factors=this_data[
self.data_meta.neutralized_risk].values.astype(
float) if self.data_meta.neutralized_risk else None,
post_process=self.data_meta.post_process)
new_factors = pd.DataFrame(new_factors, columns=new_model.features, index=codes)
er = new_model.predict(new_factors).astype(float)
alpha_logger.info('{0} re-balance: {1} codes'.format(ref_date, len(er)))
......@@ -229,7 +215,7 @@ class Strategy(object):
benchmark_w,
lbound,
ubound,
sec_cov=sec_cov,
risk_model=risk_model.get_risk_profile(codes),
current_position=current_position)
target_pos['code'] = codes
......@@ -260,15 +246,7 @@ class Strategy(object):
return ret_df, positions
@staticmethod
def _generate_sec_cov(current_data, risk_cov):
risk_exposure = current_data[all_styles].values
risk_cov = risk_cov[all_styles].values
special_risk = current_data['srisk'].values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
return sec_cov
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, sec_cov,
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, risk_model,
current_position):
more_opts = running_setting.more_opts
target_pos, _ = er_portfolio_analysis(er,
......@@ -282,7 +260,7 @@ class Strategy(object):
ubound=ubound,
current_position=current_position,
target_vol=more_opts.get('target_vol'),
cov=sec_cov,
risk_model=risk_model,
turn_over_target=more_opts.get('turn_over_target'))
return target_pos
......@@ -291,7 +269,6 @@ if __name__ == '__main__':
from matplotlib import pyplot as plt
from dask.distributed import Client
from PyFin.api import CSQuantiles
from PyFin.api import CSMeanAdjusted
from PyFin.api import LAST
from alphamind.api import Universe
from alphamind.api import ConstLinearModel
......@@ -308,7 +285,7 @@ if __name__ == '__main__':
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
start_date = '2017-01-01'
start_date = '2010-05-01'
end_date = '2018-05-17'
freq = '10b'
neutralized_risk = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment