Commit ea76cc94 authored by Dr.李's avatar Dr.李

modified model fit and predict signature

parent 794b50ea
......@@ -8,6 +8,7 @@ Created on 2017-9-4
import abc
import arrow
import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
......@@ -32,15 +33,15 @@ class ModelBase(metaclass=abc.ABCMeta):
and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas)
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y.flatten())
def fit(self, x: pd.DataFrame, y: np.ndarray):
self.impl.fit(x[self.features].values, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
def predict(self, x: pd.DataFrame) -> np.ndarray:
return self.impl.predict(x[self.features].values)
def score(self, x: np.ndarray, y: np.ndarray) -> float:
return self.impl.score(x, y)
def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
return self.impl.score(x[self.features].values, y)
@abc.abstractmethod
def save(self) -> dict:
......
......@@ -9,6 +9,7 @@ from typing import List
from distutils.version import LooseVersion
import arrow
import numpy as np
import pandas as pd
from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
......@@ -194,9 +195,9 @@ class XGBTrainer(ModelBase):
self.impl = None
self.kwargs = kwargs
def fit(self, x, y):
def fit(self, x: pd.DataFrame, y: np.ndarray):
if self.eval_sample:
x_train, x_eval, y_train, y_eval = train_test_split(x,
x_train, x_eval, y_train, y_eval = train_test_split(x[self.features].values,
y,
test_size=self.eval_sample,
random_state=42)
......@@ -209,7 +210,7 @@ class XGBTrainer(ModelBase):
verbose_eval=False,
**self.kwargs)
else:
d_train = xgb.DMatrix(x, y)
d_train = xgb.DMatrix(x[self.features].values, y)
self.impl = xgb.train(params=self.params,
dtrain=d_train,
num_boost_round=self.num_boost_round,
......@@ -217,8 +218,8 @@ class XGBTrainer(ModelBase):
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
d_predict = xgb.DMatrix(x)
def predict(self, x: pd.DataFrame) -> np.ndarray:
d_predict = xgb.DMatrix(x[self.features].values)
return self.impl.predict(d_predict)
def save(self) -> dict:
......
......@@ -7,6 +7,7 @@ Created on 2017-9-4
import unittest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
......@@ -19,10 +20,10 @@ class TestLinearModel(unittest.TestCase):
def setUp(self):
self.n = 3
self.train_x = np.random.randn(1000, self.n)
self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n)
self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self):
......
......@@ -7,6 +7,7 @@ Created on 2017-9-5
import unittest
import numpy as np
import pandas as pd
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model
......@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
def setUp(self):
self.n = 3
self.trained_x = np.random.randn(1000, self.n)
self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.trained_y = np.random.randn(1000, 1)
self.predict_x = np.random.randn(100, self.n)
self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
def test_load_model(self):
model = LinearRegression(['a', 'b', 'c'])
......
......@@ -7,6 +7,7 @@ Created on 2018-1-5
import unittest
import numpy as np
import pandas as pd
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
......@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
class TestTreeModel(unittest.TestCase):
def setUp(self):
self.x = np.random.randn(1000, 10)
self.features = list('0123456789')
self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
self.y = np.random.randn(1000)
self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10)))
model = RandomForestRegressor(features=self.features)
model.fit(self.x, self.y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10)))
model = RandomForestClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y)
......@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10)))
model = XGBRegressor(features=self.features)
model.fit(self.x, self.y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10)))
model = XGBClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y)
......@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self):
sample_x = np.random.randn(100, 10)
model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1,
max_depth=3,
features=list(range(10)),
features=self.features,
random_state=42)
model2 = XGBTrainer(features=list(range(10)),
model2 = XGBTrainer(features=self.features,
objective='reg:logistic',
booster='gbtree',
tree_method='exact',
......@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
model1.fit(self.x, y)
model2.fit(self.x, y)
predict1 = model1.predict(sample_x)
predict2 = model2.predict(sample_x)
predict1 = model1.predict(self.sample_x)
predict2 = model2.predict(self.sample_x)
predict2 = np.where(predict2 > 0.5, 1., 0.)
np.testing.assert_array_almost_equal(predict1, predict2)
def test_xgb_trainer_persistence(self):
model = XGBTrainer(features=list(range(10)),
model = XGBTrainer(features=self.features,
objective='binary:logistic',
booster='gbtree',
tree_method='hist',
......@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment