Commit ed1f44d9 authored by Dr.李's avatar Dr.李

fixed bug and add y in predict data

parent 31805b28
...@@ -8,6 +8,7 @@ Created on 2017-9-27 ...@@ -8,6 +8,7 @@ Created on 2017-9-27
import copy import copy
import bisect import bisect
from typing import Iterable from typing import Iterable
import numpy as np
import pandas as pd import pandas as pd
from simpleutils.miscellaneous import list_eq from simpleutils.miscellaneous import list_eq
from alphamind.model.modelbase import ModelBase from alphamind.model.modelbase import ModelBase
...@@ -133,7 +134,8 @@ class DataMeta(object): ...@@ -133,7 +134,8 @@ class DataMeta(object):
self.pre_process, self.pre_process,
self.post_process, self.post_process,
self.warm_start, self.warm_start,
fillna=True) fillna=True,
fit_target=alpha_model.fit_target)
def train_model(ref_date: str, def train_model(ref_date: str,
...@@ -186,6 +188,15 @@ class Composer(object): ...@@ -186,6 +188,15 @@ class Composer(object):
codes = x.index codes = x.index
return pd.DataFrame(model.predict(x_values).flatten(), index=codes) return pd.DataFrame(model.predict(x_values).flatten(), index=codes)
def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None) -> float:
model = self._fetch_latest_model(ref_date)
if x is None:
predict_data = self.data_meta.fetch_predict_data(ref_date, model)
x = predict_data['predict']['x']
if y is None:
y = predict_data['predict']['y']
return model.score(x, y)
def _fetch_latest_model(self, ref_date) -> ModelBase: def _fetch_latest_model(self, ref_date) -> ModelBase:
if self.is_updated: if self.is_updated:
sorted_keys = self.sorted_keys sorted_keys = self.sorted_keys
...@@ -211,35 +222,33 @@ class Composer(object): ...@@ -211,35 +222,33 @@ class Composer(object):
if __name__ == '__main__': if __name__ == '__main__':
import numpy as np from PyFin.api import LAST
from alphamind.data.standardize import standardize from alphamind.data.engines.sqlengine import risk_styles, industry_styles
from alphamind.data.winsorize import winsorize_normal from alphamind.model.linearmodel import LinearRegression
from alphamind.data.engines.sqlengine import industry_styles
from alphamind.model.linearmodel import ConstLinearModel universe = Universe('custom', ['ashare_ex'])
freq = '20b'
data_source = "postgres+psycopg2://postgres:we083826@localhost/alpha" batch = 0
alpha_model = ConstLinearModel(['EPS'], np.array([1.])) neutralized_risk = risk_styles + industry_styles
alpha_factors = ['EPS']
freq = '1w'
universe = Universe('zz500', ['zz500'])
batch = 4
neutralized_risk = ['SIZE'] + industry_styles
risk_model = 'short' risk_model = 'short'
pre_process = [winsorize_normal, standardize] pre_process = [winsorize_normal, standardize]
pos_process = [winsorize_normal, standardize] post_process = [standardize]
warm_start = 0
data_meta = DataMeta(freq, data_source = "postgres+psycopg2://postgres:we083826@localhost/alpha"
universe,
batch, data_meta = DataMeta(freq=freq,
neutralized_risk, universe=universe,
risk_model, batch=batch,
pre_process, neutralized_risk=neutralized_risk,
pos_process, risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source) data_source=data_source)
composer = Composer(alpha_model, data_meta) alpha_model = LinearRegression({'roe_q': LAST('roe_q')}, fit_target='roe_q')
composer = Composer(alpha_model=alpha_model, data_meta=data_meta)
composer.train('2017-09-20') ref_date = '2018-01-30'
composer.train('2017-09-22') composer.train(ref_date)
composer.train('2017-09-25') res = composer.predict(ref_date)
composer.predict('2017-09-21') \ No newline at end of file
...@@ -310,10 +310,10 @@ def fetch_train_phase(engine, ...@@ -310,10 +310,10 @@ def fetch_train_phase(engine,
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date)) pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date))
end = dates[-2] end = dates[-2]
start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0] start = dates[-batch - 2] if batch <= len(dates) - 2 else dates[0]
else: else:
end = dates[-1] end = dates[-1]
start = dates[-batch] if batch <= len(dates) else dates[0] start = dates[-batch - 1] if batch <= len(dates) else dates[0]
index = (date_label >= start) & (date_label <= end) index = (date_label >= start) & (date_label <= end)
this_raw_x = x_values[index] this_raw_x = x_values[index]
...@@ -352,7 +352,8 @@ def fetch_predict_phase(engine, ...@@ -352,7 +352,8 @@ def fetch_predict_phase(engine,
pre_process: Iterable[object] = None, pre_process: Iterable[object] = None,
post_process: Iterable[object] = None, post_process: Iterable[object] = None,
warm_start: int = 0, warm_start: int = 0,
fillna: str = None): fillna: str = None,
fit_target: Union[Transformer, object] = None):
if isinstance(alpha_factors, Transformer): if isinstance(alpha_factors, Transformer):
transformer = alpha_factors transformer = alpha_factors
else: else:
...@@ -369,6 +370,8 @@ def fetch_predict_phase(engine, ...@@ -369,6 +370,8 @@ def fetch_predict_phase(engine,
dateRule=BizDayConventions.Following, dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward) dateGenerationRule=DateGeneration.Backward)
horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fillna: if fillna:
...@@ -377,6 +380,14 @@ def fetch_predict_phase(engine, ...@@ -377,6 +380,14 @@ def fetch_predict_phase(engine,
else: else:
factor_df = factor_df.dropna() factor_df = factor_df.dropna()
if fit_target is None:
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
names = transformer.names names = transformer.names
if neutralized_risk: if neutralized_risk:
...@@ -384,13 +395,15 @@ def fetch_predict_phase(engine, ...@@ -384,13 +395,15 @@ def fetch_predict_phase(engine,
used_neutralized_risk = list(set(neutralized_risk).difference(names)) used_neutralized_risk = list(set(neutralized_risk).difference(names))
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
train_x = pd.merge(train_x, target_df, on=['trade_date', 'code'], how='left')
risk_exp = train_x[neutralized_risk].values.astype(float) risk_exp = train_x[neutralized_risk].values.astype(float)
else: else:
train_x = factor_df.copy() train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left')
risk_exp = None risk_exp = None
x_values = train_x[names].values.astype(float) x_values = train_x[names].values.astype(float)
y_values = train_x['dx'].values.astype(float)
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime() date_label = pd.DatetimeIndex(train_x.trade_date).to_pydatetime()
dates = np.unique(date_label) dates = np.unique(date_label)
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
...@@ -400,6 +413,7 @@ def fetch_predict_phase(engine, ...@@ -400,6 +413,7 @@ def fetch_predict_phase(engine,
left_index = bisect.bisect_left(date_label, start) left_index = bisect.bisect_left(date_label, start)
right_index = bisect.bisect_right(date_label, end) right_index = bisect.bisect_right(date_label, end)
this_raw_x = x_values[left_index:right_index] this_raw_x = x_values[left_index:right_index]
this_raw_y = y_values[left_index:right_index]
sub_dates = date_label[left_index:right_index] sub_dates = date_label[left_index:right_index]
if risk_exp is not None: if risk_exp is not None:
...@@ -412,10 +426,16 @@ def fetch_predict_phase(engine, ...@@ -412,10 +426,16 @@ def fetch_predict_phase(engine,
risk_factors=this_risk_exp, risk_factors=this_risk_exp,
post_process=post_process) post_process=post_process)
ne_y = factor_processing(this_raw_y,
pre_process=pre_process,
risk_factors=this_risk_exp,
post_process=post_process)
inner_left_index = bisect.bisect_left(sub_dates, end) inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end)
ne_x = ne_x[inner_left_index:inner_right_index] ne_x = ne_x[inner_left_index:inner_right_index]
ne_y = ne_y[inner_left_index:inner_right_index]
left_index = bisect.bisect_left(date_label, end) left_index = bisect.bisect_left(date_label, end)
right_index = bisect.bisect_right(date_label, end) right_index = bisect.bisect_right(date_label, end)
...@@ -423,11 +443,12 @@ def fetch_predict_phase(engine, ...@@ -423,11 +443,12 @@ def fetch_predict_phase(engine,
codes = train_x.code.values[left_index:right_index] codes = train_x.code.values[left_index:right_index]
else: else:
ne_x = None ne_x = None
ne_y = None
codes = None codes = None
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes} ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes, 'y': ne_y.flatten()}
return ret return ret
...@@ -437,7 +458,7 @@ if __name__ == '__main__': ...@@ -437,7 +458,7 @@ if __name__ == '__main__':
engine = SqlEngine('postgresql+psycopg2://postgres:we083826@localhost/alpha') engine = SqlEngine('postgresql+psycopg2://postgres:we083826@localhost/alpha')
universe = Universe('zz500', ['hs300', 'zz500']) universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = risk_styles + industry_styles neutralized_risk = risk_styles + industry_styles
res = fetch_train_phase(engine, ['ep_q'], res = fetch_predict_phase(engine, ['ep_q'],
'2012-01-05', '2012-01-05',
'5b', '5b',
universe, universe,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment