model refactor

3d9cced2 · Dr.李 · 2ddd1a02 · 3d9cced2 · 3d9cced2 · 3d9cced2
Commit 3d9cced2 authored Jan 05, 2018 by Dr.李
Show whitespace changes
Inline Side-by-side

Showing with 63 additions and 76 deletions

linearmodel.py alphamind/model/linearmodel.py +25 -49

modelbase.py alphamind/model/modelbase.py +17 -7

treemodel.py alphamind/model/treemodel.py +21 -20

No files found.
--- a/alphamind/model/linearmodel.py
+++ b/alphamind/model/linearmodel.py
@@ -6,7 +6,6 @@ Created on 2017-5-10
 """

 import numpy as np
-import arrow
 from distutils.version import LooseVersion
 from sklearn import __version__ as sklearn_version
 from sklearn.linear_model import LinearRegression as LinearRegressionImpl
@@ -14,8 +13,18 @@ from sklearn.linear_model import Lasso
 from PyFin.api import pyFinAssert
 from alphamind.model.modelbase import ModelBase
 from alphamind.utilities import alpha_logger
-from alphamind.utilities import encode
-from alphamind.utilities import decode
+
+
+class ConstLinearModelImpl(object):
+
+    def __init__(self, weights: np.ndarray=None):
+        self.weights = np.array(weights).flatten()
+
+    def fit(self, x: np.ndarray, y: np.ndarray):
+        pass
+
+    def predict(self, x: np.ndarray):
+        return x @ self.weights


 class ConstLinearModel(ModelBase):
@@ -28,47 +37,32 @@ class ConstLinearModel(ModelBase):
            pyFinAssert(len(features) == len(weights),
                        ValueError,
                        "length of features is not equal to length of weights")
-            self.weights = np.array(weights).flatten()
-
-    def fit(self, x: np.ndarray, y: np.ndarray):
-        pass
-
-    def predict(self, x):
-        return x @ self.weights
+        self.impl = ConstLinearModelImpl(weights)

    def save(self):
        model_desc = super().save()
-        model_desc['weight'] = list(self.weights)
+        model_desc['weight'] = list(self.impl.weights)
        return model_desc

    @classmethod
    def load(cls, model_desc: dict):
-        obj_layout = cls()
-        obj_layout.features = model_desc['features']
-        obj_layout.weights = np.array(model_desc['weight'])
-        return obj_layout
+        return super().load(model_desc)
+
+    @property
+    def weights(self):
+        return self.impl.weights.tolist()


 class LinearRegression(ModelBase):

-    def __init__(self, features: list=None, fit_intercept: bool=False):
+    def __init__(self, features: list=None, fit_intercept: bool=False, **kwargs):
        super().__init__(features)
-        self.impl = LinearRegressionImpl(fit_intercept=fit_intercept)
+        self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

-    def fit(self, x: np.ndarray, y: np.ndarray):
-        self.impl.fit(x, y)
-        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
-
-    def predict(self, x: np.ndarray) -> np.ndarray:
-        return self.impl.predict(x)
-
    def save(self) -> dict:
        model_desc = super().save()
-        model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
-        model_desc['desc'] = encode(self.impl)
        model_desc['sklearn_version'] = sklearn_version
-        model_desc['trained_time'] = self.trained_time
        model_desc['weight'] = self.impl.coef_.tolist()
        return model_desc

@@ -77,16 +71,12 @@ class LinearRegression(ModelBase):

    @classmethod
    def load(cls, model_desc: dict):
-        obj_layout = cls()
-        obj_layout.features = model_desc['features']
-        obj_layout.trained_time = model_desc['trained_time']
+        obj_layout = super().load(model_desc)

        if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
            alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
                                 'Loaded model may work incorrectly.'.format(
                sklearn_version, model_desc['sklearn_version']))
-
-        obj_layout.impl = decode(model_desc['desc'])
        return obj_layout

    @property
@@ -96,24 +86,14 @@ class LinearRegression(ModelBase):

 class LassoRegression(ModelBase):

-    def __init__(self, alpha, features: list=None, fit_intercept: bool=False):
+    def __init__(self, alpha=0.01, features: list=None, fit_intercept: bool=False, **kwargs):
        super().__init__(features)
-        self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept)
+        self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

-    def fit(self, x: np.ndarray, y: np.ndarray):
-        self.impl.fit(x, y)
-        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
-
-    def predict(self, x: np.ndarray) -> np.ndarray:
-        return self.impl.predict(x)
-
    def save(self) -> dict:
        model_desc = super().save()
-        model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
-        model_desc['desc'] = encode(self.impl)
        model_desc['sklearn_version'] = sklearn_version
-        model_desc['trained_time'] = self.trained_time
        model_desc['weight'] = self.impl.coef_.tolist()
        return model_desc

@@ -122,16 +102,12 @@ class LassoRegression(ModelBase):

    @classmethod
    def load(cls, model_desc: dict):
-        obj_layout = cls(alpha=0.)
-        obj_layout.features = model_desc['features']
-        obj_layout.trained_time = model_desc['trained_time']
+        obj_layout = super().load(model_desc)

        if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
            alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
                                 'Loaded model may work incorrectly.'.format(
                sklearn_version, model_desc['sklearn_version']))
-
-        obj_layout.impl = decode(model_desc['desc'])
        return obj_layout

    @property

--- a/alphamind/model/modelbase.py
+++ b/alphamind/model/modelbase.py
@@ -9,6 +9,8 @@ import abc
 import arrow
 import numpy as np
 from alphamind.utilities import alpha_logger
+from alphamind.utilities import encode
+from alphamind.utilities import decode


 class ModelBase(metaclass=abc.ABCMeta):
@@ -16,14 +18,15 @@ class ModelBase(metaclass=abc.ABCMeta):
    def __init__(self, features: list=None):
        if features is not None:
            self.features = list(features)
+        self.impl = None
+        self.trained_time = None

-    @abc.abstractmethod
    def fit(self, x, y):
-        pass
+        self.impl.fit(x, y)
+        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")

-    @abc.abstractmethod
-    def predict(self, x) -> np.ndarray:
-        pass
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        return self.impl.predict(x)

    @abc.abstractmethod
    def save(self) -> dict:
@@ -34,10 +37,17 @@ class ModelBase(metaclass=abc.ABCMeta):
        model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
                          language='python',
                          saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"),
-                          features=list(self.features))
+                          features=list(self.features),
+                          trained_time=self.trained_time,
+                          desc=encode(self.impl),
+                          internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
        return model_desc

    @abc.abstractclassmethod
    def load(cls, model_desc: dict):
-        pass
+        obj_layout = cls()
+        obj_layout.features = model_desc['features']
+        obj_layout.trained_time = model_desc['trained_time']
+        obj_layout.impl = decode(model_desc['desc'])
+        return obj_layout

--- a/alphamind/model/treemodel.py
+++ b/alphamind/model/treemodel.py
@@ -5,51 +5,52 @@ Created on 2017-12-4
 @author: cheng.li
 """

-import arrow
+from typing import List
 import numpy as np
 from distutils.version import LooseVersion
 from sklearn import __version__ as sklearn_version
 from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
+from xgboost import XGBRegressor as XGBRegressorImpl
 from alphamind.model.modelbase import ModelBase
 from alphamind.utilities import alpha_logger
-from alphamind.utilities import encode
-from alphamind.utilities import decode


 class RandomForestRegressor(ModelBase):

-    def __init__(self, n_estimators, features=None, *args, **kwargs):
+    def __init__(self, n_estimators: int=100, features: List=None, **kwargs):
        super().__init__(features)
-        self.impl = RandomForestRegressorImpl(n_estimators, *args, **kwargs)
-
-    def fit(self, x: np.ndarray, y: np.ndarray):
-        self.impl.fit(x, y)
-        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
-
-    def predict(self, x: np.ndarray) -> np.ndarray:
-        return self.impl.predict(x)
+        self.impl = RandomForestRegressorImpl(n_estimators, **kwargs)
+        self.trained_time = None

    def score(self, x: np.ndarray, y: np.ndarray) -> float:
        return self.impl.score(x, y)

    def save(self) -> dict:
        model_desc = super().save()
-        model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
-        model_desc['desc'] = encode(self.impl)
        model_desc['sklearn_version'] = sklearn_version
-        model_desc['trained_time'] = self.trained_time

    @classmethod
    def load(cls, model_desc: dict):
-        obj_layout = cls()
-        obj_layout.features = model_desc['features']
-        obj_layout.trained_time = model_desc['trained_time']
+        obj_layout = super().load(model_desc)

        if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
            alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
                                 'Loaded model may work incorrectly.'.format(
                sklearn_version, model_desc['sklearn_version']))
-
-        obj_layout.impl = decode(model_desc['desc'])
        return obj_layout

+
+class XGBRegressor(ModelBase):
+
+    def __init__(self,
+                 n_estimators: int=100,
+                 learning_rate: float=0.1,
+                 max_depth: int=3,
+                 features: List=None, **kwargs):
+        super().__init__(features)
+        self.impl = XGBRegressorImpl(n_estimators=n_estimators,
+                                     learning_rate=learning_rate,
+                                     max_depth=max_depth,
+                                     **kwargs)
+
+