Commit ea76cc94 authored by Dr.李's avatar Dr.李

modified model fit and predict signature

parent 794b50ea
...@@ -8,6 +8,7 @@ Created on 2017-9-4 ...@@ -8,6 +8,7 @@ Created on 2017-9-4
import abc import abc
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq from simpleutils.miscellaneous import list_eq
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode from alphamind.utilities import encode
...@@ -32,15 +33,15 @@ class ModelBase(metaclass=abc.ABCMeta): ...@@ -32,15 +33,15 @@ class ModelBase(metaclass=abc.ABCMeta):
and list_eq(self.features, rhs.features) \ and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas) and encode(self.formulas) == encode(rhs.formulas)
def fit(self, x: np.ndarray, y: np.ndarray): def fit(self, x: pd.DataFrame, y: np.ndarray):
self.impl.fit(x, y.flatten()) self.impl.fit(x[self.features].values, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray: def predict(self, x: pd.DataFrame) -> np.ndarray:
return self.impl.predict(x) return self.impl.predict(x[self.features].values)
def score(self, x: np.ndarray, y: np.ndarray) -> float: def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
return self.impl.score(x, y) return self.impl.score(x[self.features].values, y)
@abc.abstractmethod @abc.abstractmethod
def save(self) -> dict: def save(self) -> dict:
......
...@@ -9,6 +9,7 @@ from typing import List ...@@ -9,6 +9,7 @@ from typing import List
from distutils.version import LooseVersion from distutils.version import LooseVersion
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
...@@ -194,9 +195,9 @@ class XGBTrainer(ModelBase): ...@@ -194,9 +195,9 @@ class XGBTrainer(ModelBase):
self.impl = None self.impl = None
self.kwargs = kwargs self.kwargs = kwargs
def fit(self, x, y): def fit(self, x: pd.DataFrame, y: np.ndarray):
if self.eval_sample: if self.eval_sample:
x_train, x_eval, y_train, y_eval = train_test_split(x, x_train, x_eval, y_train, y_eval = train_test_split(x[self.features].values,
y, y,
test_size=self.eval_sample, test_size=self.eval_sample,
random_state=42) random_state=42)
...@@ -209,7 +210,7 @@ class XGBTrainer(ModelBase): ...@@ -209,7 +210,7 @@ class XGBTrainer(ModelBase):
verbose_eval=False, verbose_eval=False,
**self.kwargs) **self.kwargs)
else: else:
d_train = xgb.DMatrix(x, y) d_train = xgb.DMatrix(x[self.features].values, y)
self.impl = xgb.train(params=self.params, self.impl = xgb.train(params=self.params,
dtrain=d_train, dtrain=d_train,
num_boost_round=self.num_boost_round, num_boost_round=self.num_boost_round,
...@@ -217,8 +218,8 @@ class XGBTrainer(ModelBase): ...@@ -217,8 +218,8 @@ class XGBTrainer(ModelBase):
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray: def predict(self, x: pd.DataFrame) -> np.ndarray:
d_predict = xgb.DMatrix(x) d_predict = xgb.DMatrix(x[self.features].values)
return self.impl.predict(d_predict) return self.impl.predict(d_predict)
def save(self) -> dict: def save(self) -> dict:
......
...@@ -7,6 +7,7 @@ Created on 2017-9-4 ...@@ -7,6 +7,7 @@ Created on 2017-9-4
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2 from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
...@@ -19,10 +20,10 @@ class TestLinearModel(unittest.TestCase): ...@@ -19,10 +20,10 @@ class TestLinearModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.train_x = np.random.randn(1000, self.n) self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.train_y = np.random.randn(1000) self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0) self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n) self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self): def test_const_linear_model(self):
......
...@@ -7,6 +7,7 @@ Created on 2017-9-5 ...@@ -7,6 +7,7 @@ Created on 2017-9-5
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
...@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase): ...@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.trained_x = np.random.randn(1000, self.n) self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.trained_y = np.random.randn(1000, 1) self.trained_y = np.random.randn(1000, 1)
self.predict_x = np.random.randn(100, self.n) self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
def test_load_model(self): def test_load_model(self):
model = LinearRegression(['a', 'b', 'c']) model = LinearRegression(['a', 'b', 'c'])
......
...@@ -7,6 +7,7 @@ Created on 2018-1-5 ...@@ -7,6 +7,7 @@ Created on 2018-1-5
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier from alphamind.model.treemodel import RandomForestClassifier
...@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer ...@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
class TestTreeModel(unittest.TestCase): class TestTreeModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.x = np.random.randn(1000, 10) self.features = list('0123456789')
self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
self.y = np.random.randn(1000) self.y = np.random.randn(1000)
self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
def test_random_forest_regress_persistence(self): def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10))) model = RandomForestRegressor(features=self.features)
model.fit(self.x, self.y) model.fit(self.x, self.y)
desc = model.save() desc = model.save()
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self): def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10))) model = RandomForestClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0) y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y) model.fit(self.x, y)
...@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase): ...@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self): def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10))) model = XGBRegressor(features=self.features)
model.fit(self.x, self.y) model.fit(self.x, self.y)
desc = model.save() desc = model.save()
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self): def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10))) model = XGBClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0) y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y) model.fit(self.x, y)
...@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase): ...@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self): def test_xgb_trainer_equal_classifier(self):
sample_x = np.random.randn(100, 10)
model1 = XGBClassifier(n_estimators=100, model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1, learning_rate=0.1,
max_depth=3, max_depth=3,
features=list(range(10)), features=self.features,
random_state=42) random_state=42)
model2 = XGBTrainer(features=list(range(10)), model2 = XGBTrainer(features=self.features,
objective='reg:logistic', objective='reg:logistic',
booster='gbtree', booster='gbtree',
tree_method='exact', tree_method='exact',
...@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase): ...@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
model1.fit(self.x, y) model1.fit(self.x, y)
model2.fit(self.x, y) model2.fit(self.x, y)
predict1 = model1.predict(sample_x) predict1 = model1.predict(self.sample_x)
predict2 = model2.predict(sample_x) predict2 = model2.predict(self.sample_x)
predict2 = np.where(predict2 > 0.5, 1., 0.) predict2 = np.where(predict2 > 0.5, 1., 0.)
np.testing.assert_array_almost_equal(predict1, predict2) np.testing.assert_array_almost_equal(predict1, predict2)
def test_xgb_trainer_persistence(self): def test_xgb_trainer_persistence(self):
model = XGBTrainer(features=list(range(10)), model = XGBTrainer(features=self.features,
objective='binary:logistic', objective='binary:logistic',
booster='gbtree', booster='gbtree',
tree_method='hist', tree_method='hist',
...@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase): ...@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment