Commit 348a9c38 authored by Dr.李's avatar Dr.李

added more model

parent 2cf8fae1
......@@ -28,6 +28,7 @@ from alphamind.data.engines.sqlengine import factor_tables
from alphamind.model import LinearRegression
from alphamind.model import LassoRegression
from alphamind.model import ConstLinearModel
from alphamind.model import LogisticRegression
from alphamind.model import RandomForestRegressor
from alphamind.model import XGBRegressor
from alphamind.model import load_model
......@@ -40,27 +41,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.utilities import alpha_logger
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
from alphamind.utilities import map_freq
__all__ = [
......@@ -86,6 +67,7 @@ __all__ = [
'LinearRegression',
'LassoRegression',
'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor',
'XGBRegressor',
'load_model',
......
......@@ -8,6 +8,7 @@ Created on 2017-5-2
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBRegressor
......@@ -18,6 +19,7 @@ from alphamind.model.loader import load_model
__all__ = ['LinearRegression',
'LassoRegression',
'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor',
'XGBRegressor',
'load_model']
\ No newline at end of file
......@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar
from PyFin.DateUtilities import Period
from PyFin.Enums import TimeUnits
from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.utilities import alpha_logger
def _map_horizon(frequency: str) -> int:
parsed_period = Period(frequency)
unit = parsed_period.units()
length = parsed_period.length()
if unit == TimeUnits.BDays or unit == TimeUnits.Days:
return length - 1
elif unit == TimeUnits.Weeks:
return 5 * length - 1
elif unit == TimeUnits.Months:
return 22 * length - 1
else:
raise ValueError('{0} is an unrecognized frequency rule'.format(frequency))
from alphamind.utilities import map_freq
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk):
......@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
dates = [d.strftime('%Y-%m-%d') for d in dates]
horizon = _map_horizon(frequency)
horizon = map_freq(frequency)
if isinstance(factors, Transformer):
transformer = factors
......@@ -265,7 +251,7 @@ def fetch_train_phase(engine,
dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward)
horizon = _map_horizon(frequency)
horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
......@@ -344,10 +330,10 @@ def fetch_predict_phase(engine,
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
risk_exp = train_x[neutralized_risk].values.astype(float)
x_values = train_x[names].values.astype(float)
else:
train_x = factor_df.copy()
risk_exp = None
x_values = train_x[names].values.astype(float)
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
dates = np.unique(date_label)
......
......@@ -10,6 +10,7 @@ from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version
from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert
from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger
......@@ -109,6 +110,33 @@ class LassoRegression(ModelBase):
return self.impl.coef_.tolist()
class LogisticRegression(ModelBase):
def __init__(self, features: list=None, fit_intercept: bool=False, **kwargs):
super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(
sklearn_version, model_desc['sklearn_version']))
return obj_layout
@property
def weights(self):
return self.impl.coef_.tolist()
if __name__ == '__main__':
import pprint
......
......@@ -9,6 +9,7 @@ from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBRegressor
......@@ -24,6 +25,8 @@ def load_model(model_desc: dict) -> ModelBase:
return LinearRegression.load(model_desc)
elif 'LassoRegression' in model_name_parts:
return LassoRegression.load(model_desc)
elif 'LogisticRegression' in model_name_parts:
return LogisticRegression.load(model_desc)
elif 'RandomForestRegressor' in model_name_parts:
return RandomForestRegressor.load(model_desc)
elif 'XGBRegressor' in model_name_parts:
......
......@@ -25,6 +25,7 @@ class RandomForestRegressor(ModelBase):
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -37,6 +38,10 @@ class RandomForestRegressor(ModelBase):
sklearn_version, model_desc['sklearn_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
class XGBRegressor(ModelBase):
......@@ -54,6 +59,7 @@ class XGBRegressor(ModelBase):
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -66,6 +72,10 @@ class XGBRegressor(ModelBase):
xgbboot_version, model_desc['xgbboot_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
......@@ -11,6 +11,8 @@ from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression
class TestLinearModel(unittest.TestCase):
......@@ -18,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
def setUp(self):
self.n = 3
self.train_x = np.random.randn(1000, self.n)
self.train_y = np.random.randn(1000, 1)
self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n)
def test_const_linear_model(self):
......@@ -53,6 +56,7 @@ class TestLinearModel(unittest.TestCase):
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_linear_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
......@@ -65,3 +69,31 @@ class TestLinearModel(unittest.TestCase):
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
def test_logistic_regression(self):
model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
calculated_y = model.predict(self.predict_x)
expected_model = LogisticRegression2(fit_intercept=False)
expected_model.fit(self.train_x, self.train_y_label)
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_logistic_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
desc = model.save()
new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
......@@ -16,6 +16,27 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
def groupby(groups):
order = groups.argsort()
t = groups[order]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment