Commit 2553b951 authored by Dr.李's avatar Dr.李

added fit target setting in models

parent 189ee654
......@@ -9,6 +9,7 @@ import copy
import bisect
from typing import Iterable
import pandas as pd
from typing import Union
from simpleutils.miscellaneous import list_eq
from alphamind.model.modelbase import ModelBase
from alphamind.model.data_preparing import fetch_train_phase
......@@ -19,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
from alphamind.data.rank import rank
from alphamind.data.standardize import standardize
from alphamind.model.loader import load_model
from alphamind.data.transformer import Transformer
PROCESS_MAPPING = {
'winsorize_normal': winsorize_normal,
......@@ -116,7 +118,8 @@ class DataMeta(object):
self.risk_model,
self.pre_process,
self.post_process,
self.warm_start)
self.warm_start,
fit_target=alpha_model.fit_target)
def fetch_predict_data(self,
ref_date: str,
......
......@@ -60,7 +60,8 @@ def prepare_data(engine: SqlEngine,
frequency: str,
universe: Universe,
benchmark: int,
warm_start: int = 0):
warm_start: int = 0,
fit_target: Union[Transformer, object]=None):
if warm_start > 0:
p = Period(frequency)
p = Period(length=-warm_start * p.length(), units=p.units())
......@@ -86,14 +87,22 @@ def prepare_data(engine: SqlEngine,
factors=transformer,
dates=dates).sort_values(['trade_date', 'code'])
alpha_logger.info("factor data loading finished")
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
alpha_logger.info("return data loading finished")
if fit_target is None:
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
alpha_logger.info("fit target data loading finished")
industry_df = engine.fetch_industry_range(universe, dates=dates)
alpha_logger.info("industry data loading finished")
benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
alpha_logger.info("benchmark data loading finished")
df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
df = pd.merge(factor_df, target_df, on=['trade_date', 'code']).dropna()
df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
df = pd.merge(df, industry_df, on=['trade_date', 'code'])
df['weight'] = df['weight'].fillna(0.)
......@@ -262,7 +271,7 @@ def fetch_train_phase(engine,
pre_process: Iterable[object] = None,
post_process: Iterable[object] = None,
warm_start: int = 0,
fitting_target: Union[Transformer, object] = None) -> dict:
fit_target: Union[Transformer, object] = None) -> dict:
if isinstance(alpha_factors, Transformer):
transformer = alpha_factors
else:
......@@ -282,11 +291,11 @@ def fetch_train_phase(engine,
horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fitting_target is None:
if fit_target is None:
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fitting_target, dates=dates + [one_more_date])
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
......@@ -424,14 +433,16 @@ def fetch_predict_phase(engine,
if __name__ == '__main__':
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
from alphamind.api import risk_styles, industry_styles, standardize
engine = SqlEngine('postgresql+psycopg2://postgres:we083826@localhost/alpha')
universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = ['SIZE']
neutralized_risk = risk_styles + industry_styles
res = fetch_train_phase(engine, ['ep_q'],
'2012-01-05',
'5b',
universe,
16,
neutralized_risk=neutralized_risk,
fitting_target='closePrice')
post_process=[standardize],
fit_target='closePrice')
print(res)
......@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
def __init__(self,
features=None,
weights: dict = None):
super().__init__(features)
weights: dict = None,
fit_target=None):
super().__init__(features=features, fit_target=fit_target)
if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights),
ValueError,
......@@ -57,8 +58,8 @@ class ConstLinearModel(ModelBase):
class LinearRegression(ModelBase):
def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......@@ -111,8 +112,8 @@ class LassoRegression(ModelBase):
class LogisticRegression(ModelBase):
def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......
......@@ -18,12 +18,17 @@ from alphamind.data.transformer import Transformer
class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features=None):
def __init__(self, features=None, fit_target=None):
if features is not None:
self.formulas = Transformer(features)
self.features = self.formulas.names
else:
self.features = None
if fit_target is not None:
self.fit_target = Transformer(fit_target)
else:
self.fit_target = None
self.impl = None
self.trained_time = None
......@@ -31,7 +36,8 @@ class ModelBase(metaclass=abc.ABCMeta):
return encode(self.impl) == encode(rhs.impl) \
and self.trained_time == rhs.trained_time \
and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas)
and encode(self.formulas) == encode(rhs.formulas) \
and encode(self.fit_target) == encode(rhs.fit_target)
def fit(self, x: pd.DataFrame, y: np.ndarray):
self.impl.fit(x[self.features].values, y.flatten())
......@@ -56,15 +62,21 @@ class ModelBase(metaclass=abc.ABCMeta):
trained_time=self.trained_time,
desc=encode(self.impl),
formulas=encode(self.formulas),
fit_target=encode(self.fit_target),
internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
return model_desc
@abc.abstractclassmethod
@abc.abstractmethod
@classmethod
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.formulas = decode(model_desc['formulas'])
obj_layout.trained_time = model_desc['trained_time']
obj_layout.impl = decode(model_desc['desc'])
if 'fit_target' in model_desc:
obj_layout.fit_target = decode(model_desc['fit_target'])
else:
obj_layout.fit_target = None
return obj_layout
......@@ -27,8 +27,9 @@ class RandomForestRegressor(ModelBase):
n_estimators: int=100,
max_features: str='auto',
features=None,
fit_target=None,
**kwargs):
super().__init__(features)
super().__init__(features=features, fit_target=fit_target)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -59,8 +60,9 @@ class RandomForestClassifier(ModelBase):
n_estimators: int=100,
max_features: str='auto',
features=None,
fit_target=None,
**kwargs):
super().__init__(features)
super().__init__(features=features, fit_target=fit_target)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -92,9 +94,10 @@ class XGBRegressor(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features=None,
fit_target=None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features=features, fit_target=fit_target)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
......@@ -128,9 +131,10 @@ class XGBClassifier(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features=None,
fit_target=None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features=features, fit_target=fit_target)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
......@@ -171,10 +175,11 @@ class XGBTrainer(ModelBase):
subsample=1.,
colsample_bytree=1.,
features=None,
fit_target=None,
random_state: int=0,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features=features, fit_target=fit_target)
self.params = {
'silent': 1,
'objective': objective,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment