Commit 348a9c38 authored by Dr.李's avatar Dr.李

added more model

parent 2cf8fae1
...@@ -28,6 +28,7 @@ from alphamind.data.engines.sqlengine import factor_tables ...@@ -28,6 +28,7 @@ from alphamind.data.engines.sqlengine import factor_tables
from alphamind.model import LinearRegression from alphamind.model import LinearRegression
from alphamind.model import LassoRegression from alphamind.model import LassoRegression
from alphamind.model import ConstLinearModel from alphamind.model import ConstLinearModel
from alphamind.model import LogisticRegression
from alphamind.model import RandomForestRegressor from alphamind.model import RandomForestRegressor
from alphamind.model import XGBRegressor from alphamind.model import XGBRegressor
from alphamind.model import load_model from alphamind.model import load_model
...@@ -40,27 +41,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor ...@@ -40,27 +41,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.pipeline import ExecutionPipeline from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
__all__ = [ __all__ = [
...@@ -86,6 +67,7 @@ __all__ = [ ...@@ -86,6 +67,7 @@ __all__ = [
'LinearRegression', 'LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor', 'RandomForestRegressor',
'XGBRegressor', 'XGBRegressor',
'load_model', 'load_model',
......
...@@ -8,6 +8,7 @@ Created on 2017-5-2 ...@@ -8,6 +8,7 @@ Created on 2017-5-2
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBRegressor from alphamind.model.treemodel import XGBRegressor
...@@ -18,6 +19,7 @@ from alphamind.model.loader import load_model ...@@ -18,6 +19,7 @@ from alphamind.model.loader import load_model
__all__ = ['LinearRegression', __all__ = ['LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor', 'RandomForestRegressor',
'XGBRegressor', 'XGBRegressor',
'load_model'] 'load_model']
\ No newline at end of file
...@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions ...@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from PyFin.DateUtilities import Period from PyFin.DateUtilities import Period
from PyFin.Enums import TimeUnits
from alphamind.data.transformer import Transformer from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
def _map_horizon(frequency: str) -> int:
parsed_period = Period(frequency)
unit = parsed_period.units()
length = parsed_period.length()
if unit == TimeUnits.BDays or unit == TimeUnits.Days:
return length - 1
elif unit == TimeUnits.Weeks:
return 5 * length - 1
elif unit == TimeUnits.Months:
return 22 * length - 1
else:
raise ValueError('{0} is an unrecognized frequency rule'.format(frequency))
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk): def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk):
...@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine, ...@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
dates = [d.strftime('%Y-%m-%d') for d in dates] dates = [d.strftime('%Y-%m-%d') for d in dates]
horizon = _map_horizon(frequency) horizon = map_freq(frequency)
if isinstance(factors, Transformer): if isinstance(factors, Transformer):
transformer = factors transformer = factors
...@@ -265,7 +251,7 @@ def fetch_train_phase(engine, ...@@ -265,7 +251,7 @@ def fetch_train_phase(engine,
dateRule=BizDayConventions.Following, dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward) dateGenerationRule=DateGeneration.Backward)
horizon = _map_horizon(frequency) horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
...@@ -344,10 +330,10 @@ def fetch_predict_phase(engine, ...@@ -344,10 +330,10 @@ def fetch_predict_phase(engine,
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
risk_exp = train_x[neutralized_risk].values.astype(float) risk_exp = train_x[neutralized_risk].values.astype(float)
x_values = train_x[names].values.astype(float)
else: else:
train_x = factor_df.copy() train_x = factor_df.copy()
risk_exp = None risk_exp = None
x_values = train_x[names].values.astype(float)
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime() date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
dates = np.unique(date_label) dates = np.unique(date_label)
......
...@@ -10,6 +10,7 @@ from distutils.version import LooseVersion ...@@ -10,6 +10,7 @@ from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from sklearn.linear_model import LinearRegression as LinearRegressionImpl from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import Lasso from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert from PyFin.api import pyFinAssert
from alphamind.model.modelbase import ModelBase from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
...@@ -109,6 +110,33 @@ class LassoRegression(ModelBase): ...@@ -109,6 +110,33 @@ class LassoRegression(ModelBase):
return self.impl.coef_.tolist() return self.impl.coef_.tolist()
class LogisticRegression(ModelBase):
def __init__(self, features: list=None, fit_intercept: bool=False, **kwargs):
super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(
sklearn_version, model_desc['sklearn_version']))
return obj_layout
@property
def weights(self):
return self.impl.coef_.tolist()
if __name__ == '__main__': if __name__ == '__main__':
import pprint import pprint
......
...@@ -9,6 +9,7 @@ from alphamind.model.modelbase import ModelBase ...@@ -9,6 +9,7 @@ from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBRegressor from alphamind.model.treemodel import XGBRegressor
...@@ -24,6 +25,8 @@ def load_model(model_desc: dict) -> ModelBase: ...@@ -24,6 +25,8 @@ def load_model(model_desc: dict) -> ModelBase:
return LinearRegression.load(model_desc) return LinearRegression.load(model_desc)
elif 'LassoRegression' in model_name_parts: elif 'LassoRegression' in model_name_parts:
return LassoRegression.load(model_desc) return LassoRegression.load(model_desc)
elif 'LogisticRegression' in model_name_parts:
return LogisticRegression.load(model_desc)
elif 'RandomForestRegressor' in model_name_parts: elif 'RandomForestRegressor' in model_name_parts:
return RandomForestRegressor.load(model_desc) return RandomForestRegressor.load(model_desc)
elif 'XGBRegressor' in model_name_parts: elif 'XGBRegressor' in model_name_parts:
......
...@@ -25,6 +25,7 @@ class RandomForestRegressor(ModelBase): ...@@ -25,6 +25,7 @@ class RandomForestRegressor(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -37,6 +38,10 @@ class RandomForestRegressor(ModelBase): ...@@ -37,6 +38,10 @@ class RandomForestRegressor(ModelBase):
sklearn_version, model_desc['sklearn_version'])) sklearn_version, model_desc['sklearn_version']))
return obj_layout return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
class XGBRegressor(ModelBase): class XGBRegressor(ModelBase):
...@@ -54,6 +59,7 @@ class XGBRegressor(ModelBase): ...@@ -54,6 +59,7 @@ class XGBRegressor(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -66,6 +72,10 @@ class XGBRegressor(ModelBase): ...@@ -66,6 +72,10 @@ class XGBRegressor(ModelBase):
xgbboot_version, model_desc['xgbboot_version'])) xgbboot_version, model_desc['xgbboot_version']))
return obj_layout return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
...@@ -11,6 +11,8 @@ from sklearn.linear_model import LinearRegression as LinearRegression2 ...@@ -11,6 +11,8 @@ from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression
class TestLinearModel(unittest.TestCase): class TestLinearModel(unittest.TestCase):
...@@ -18,7 +20,8 @@ class TestLinearModel(unittest.TestCase): ...@@ -18,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.train_x = np.random.randn(1000, self.n) self.train_x = np.random.randn(1000, self.n)
self.train_y = np.random.randn(1000, 1) self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n) self.predict_x = np.random.randn(10, self.n)
def test_const_linear_model(self): def test_const_linear_model(self):
...@@ -53,6 +56,7 @@ class TestLinearModel(unittest.TestCase): ...@@ -53,6 +56,7 @@ class TestLinearModel(unittest.TestCase):
expected_y = expected_model.predict(self.predict_x) expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_linear_regression_persistence(self): def test_linear_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
...@@ -65,3 +69,31 @@ class TestLinearModel(unittest.TestCase): ...@@ -65,3 +69,31 @@ class TestLinearModel(unittest.TestCase):
expected_y = model.predict(self.predict_x) expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
def test_logistic_regression(self):
model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
calculated_y = model.predict(self.predict_x)
expected_model = LogisticRegression2(fit_intercept=False)
expected_model.fit(self.train_x, self.train_y_label)
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_logistic_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
desc = model.save()
new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
...@@ -16,6 +16,27 @@ import numba as nb ...@@ -16,6 +16,27 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info') alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
def groupby(groups): def groupby(groups):
order = groups.argsort() order = groups.argsort()
t = groups[order] t = groups[order]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment