modified model fit and predict signature

ea76cc94 · Dr.李 · 794b50ea · ea76cc94 · ea76cc94 · ea76cc94
Commit ea76cc94 authored Feb 09, 2018 by Dr.李
5 changed files
--- a/alphamind/model/modelbase.py
+++ b/alphamind/model/modelbase.py
@@ -8,6 +8,7 @@ Created on 2017-9-4
 import abc
 import arrow
 import numpy as np
+import pandas as pd
 from simpleutils.miscellaneous import list_eq
 from alphamind.utilities import alpha_logger
 from alphamind.utilities import encode
@@ -32,15 +33,15 @@ class ModelBase(metaclass=abc.ABCMeta):
               and list_eq(self.features, rhs.features) \
               and encode(self.formulas) == encode(rhs.formulas)
-    def fit(self, x: np.ndarray, y: np.ndarray):
+    def fit(self, x: pd.DataFrame, y: np.ndarray):
-        self.impl.fit(x, y.flatten())
+        self.impl.fit(x[self.features].values, y.flatten())
        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
-    def predict(self, x: np.ndarray) -> np.ndarray:
+    def predict(self, x: pd.DataFrame) -> np.ndarray:
-        return self.impl.predict(x)
+        return self.impl.predict(x[self.features].values)
-    def score(self, x: np.ndarray, y: np.ndarray) -> float:
+    def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
-        return self.impl.score(x, y)
+        return self.impl.score(x[self.features].values, y)
    @abc.abstractmethod
    def save(self) -> dict:

--- a/alphamind/model/treemodel.py
+++ b/alphamind/model/treemodel.py
@@ -9,6 +9,7 @@ from typing import List
 from distutils.version import LooseVersion
 import arrow
 import numpy as np
+import pandas as pd
 from sklearn import __version__ as sklearn_version
 from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
 from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
@@ -194,9 +195,9 @@ class XGBTrainer(ModelBase):
        self.impl = None
        self.kwargs = kwargs
-    def fit(self, x, y):
+    def fit(self, x: pd.DataFrame, y: np.ndarray):
        if self.eval_sample:
-            x_train, x_eval, y_train, y_eval = train_test_split(x,
+            x_train, x_eval, y_train, y_eval = train_test_split(x[self.features].values,
                                                                y,
                                                                test_size=self.eval_sample,
                                                                random_state=42)
@@ -209,7 +210,7 @@ class XGBTrainer(ModelBase):
                                  verbose_eval=False,
                                  **self.kwargs)
        else:
-            d_train = xgb.DMatrix(x, y)
+            d_train = xgb.DMatrix(x[self.features].values, y)
            self.impl = xgb.train(params=self.params,
                                  dtrain=d_train,
                                  num_boost_round=self.num_boost_round,
@@ -217,8 +218,8 @@ class XGBTrainer(ModelBase):
        self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
-    def predict(self, x: np.ndarray) -> np.ndarray:
+    def predict(self, x: pd.DataFrame) -> np.ndarray:
-        d_predict = xgb.DMatrix(x)
+        d_predict = xgb.DMatrix(x[self.features].values)
        return self.impl.predict(d_predict)
    def save(self) -> dict:

--- a/alphamind/tests/model/test_linearmodel.py
+++ b/alphamind/tests/model/test_linearmodel.py
@@ -7,6 +7,7 @@ Created on 2017-9-4
 import unittest
 import numpy as np
+import pandas as pd
 from sklearn.linear_model import LinearRegression as LinearRegression2
 from alphamind.model.loader import load_model
 from alphamind.model.linearmodel import ConstLinearModel
@@ -19,10 +20,10 @@ class TestLinearModel(unittest.TestCase):
    def setUp(self):
        self.n = 3
-        self.train_x = np.random.randn(1000, self.n)
+        self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
        self.train_y = np.random.randn(1000)
        self.train_y_label = np.where(self.train_y > 0., 1, 0)
-        self.predict_x = np.random.randn(10, self.n)
+        self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
    def test_const_linear_model(self):

--- a/alphamind/tests/model/test_loader.py
+++ b/alphamind/tests/model/test_loader.py
@@ -7,6 +7,7 @@ Created on 2017-9-5
 import unittest
 import numpy as np
+import pandas as pd
 from alphamind.model.linearmodel import LinearRegression
 from alphamind.model.loader import load_model
@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
    def setUp(self):
        self.n = 3
-        self.trained_x = np.random.randn(1000, self.n)
+        self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
        self.trained_y = np.random.randn(1000, 1)
-        self.predict_x = np.random.randn(100, self.n)
+        self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
    def test_load_model(self):
        model = LinearRegression(['a', 'b', 'c'])

--- a/alphamind/tests/model/test_treemodel.py
+++ b/alphamind/tests/model/test_treemodel.py
@@ -7,6 +7,7 @@ Created on 2018-1-5
 import unittest
 import numpy as np
+import pandas as pd
 from alphamind.model.loader import load_model
 from alphamind.model.treemodel import RandomForestRegressor
 from alphamind.model.treemodel import RandomForestClassifier
@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
 class TestTreeModel(unittest.TestCase):
    def setUp(self):
-        self.x = np.random.randn(1000, 10)
+        self.features = list('0123456789')
+        self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
        self.y = np.random.randn(1000)
+        self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
    def test_random_forest_regress_persistence(self):
-        model = RandomForestRegressor(features=list(range(10)))
+        model = RandomForestRegressor(features=self.features)
        model.fit(self.x, self.y)
        desc = model.save()
        new_model = load_model(desc)
        self.assertEqual(model.features, new_model.features)
-        sample_x = np.random.randn(100, 10)
+        np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
-        np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
        np.testing.assert_array_almost_equal(model.importances, new_model.importances)
    def test_random_forest_classify_persistence(self):
-        model = RandomForestClassifier(features=list(range(10)))
+        model = RandomForestClassifier(features=self.features)
        y = np.where(self.y > 0, 1, 0)
        model.fit(self.x, y)
@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
        new_model = load_model(desc)
        self.assertEqual(model.features, new_model.features)
-        sample_x = np.random.randn(100, 10)
+        np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
-        np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
        np.testing.assert_array_almost_equal(model.importances, new_model.importances)
    def test_xgb_regress_persistence(self):
-        model = XGBRegressor(features=list(range(10)))
+        model = XGBRegressor(features=self.features)
        model.fit(self.x, self.y)
        desc = model.save()
        new_model = load_model(desc)
        self.assertEqual(model.features, new_model.features)
-        sample_x = np.random.randn(100, 10)
+        np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
-        np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
        np.testing.assert_array_almost_equal(model.importances, new_model.importances)
    def test_xgb_classify_persistence(self):
-        model = XGBClassifier(features=list(range(10)))
+        model = XGBClassifier(features=self.features)
        y = np.where(self.y > 0, 1, 0)
        model.fit(self.x, y)
@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
        new_model = load_model(desc)
        self.assertEqual(model.features, new_model.features)
-        sample_x = np.random.randn(100, 10)
+        np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
-        np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
        np.testing.assert_array_almost_equal(model.importances, new_model.importances)
    def test_xgb_trainer_equal_classifier(self):
-        sample_x = np.random.randn(100, 10)
        model1 = XGBClassifier(n_estimators=100,
                               learning_rate=0.1,
                               max_depth=3,
-                               features=list(range(10)),
+                               features=self.features,
                               random_state=42)
-        model2 = XGBTrainer(features=list(range(10)),
+        model2 = XGBTrainer(features=self.features,
                            objective='reg:logistic',
                            booster='gbtree',
                            tree_method='exact',
@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
        model1.fit(self.x, y)
        model2.fit(self.x, y)
-        predict1 = model1.predict(sample_x)
+        predict1 = model1.predict(self.sample_x)
-        predict2 = model2.predict(sample_x)
+        predict2 = model2.predict(self.sample_x)
        predict2 = np.where(predict2 > 0.5, 1., 0.)
        np.testing.assert_array_almost_equal(predict1, predict2)
    def test_xgb_trainer_persistence(self):
-        model = XGBTrainer(features=list(range(10)),
+        model = XGBTrainer(features=self.features,
                           objective='binary:logistic',
                           booster='gbtree',
                           tree_method='hist',
@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
        new_model = load_model(desc)
        self.assertEqual(model.features, new_model.features)
-        sample_x = np.random.randn(100, 10)
+        np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
-        np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
        np.testing.assert_array_almost_equal(model.importances, new_model.importances)