Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
ea76cc94
Commit
ea76cc94
authored
Feb 09, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modified model fit and predict signature
parent
794b50ea
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
37 additions
and
36 deletions
+37
-36
modelbase.py
alphamind/model/modelbase.py
+7
-6
treemodel.py
alphamind/model/treemodel.py
+6
-5
test_linearmodel.py
alphamind/tests/model/test_linearmodel.py
+3
-2
test_loader.py
alphamind/tests/model/test_loader.py
+3
-2
test_treemodel.py
alphamind/tests/model/test_treemodel.py
+18
-21
No files found.
alphamind/model/modelbase.py
View file @
ea76cc94
...
...
@@ -8,6 +8,7 @@ Created on 2017-9-4
import
abc
import
arrow
import
numpy
as
np
import
pandas
as
pd
from
simpleutils.miscellaneous
import
list_eq
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
encode
...
...
@@ -32,15 +33,15 @@ class ModelBase(metaclass=abc.ABCMeta):
and
list_eq
(
self
.
features
,
rhs
.
features
)
\
and
encode
(
self
.
formulas
)
==
encode
(
rhs
.
formulas
)
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
self
.
impl
.
fit
(
x
,
y
.
flatten
())
def
fit
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarray
):
self
.
impl
.
fit
(
x
[
self
.
features
]
.
values
,
y
.
flatten
())
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
return
self
.
impl
.
predict
(
x
)
def
predict
(
self
,
x
:
pd
.
DataFrame
)
->
np
.
ndarray
:
return
self
.
impl
.
predict
(
x
[
self
.
features
]
.
values
)
def
score
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
)
->
float
:
return
self
.
impl
.
score
(
x
,
y
)
def
score
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarray
)
->
float
:
return
self
.
impl
.
score
(
x
[
self
.
features
]
.
values
,
y
)
@
abc
.
abstractmethod
def
save
(
self
)
->
dict
:
...
...
alphamind/model/treemodel.py
View file @
ea76cc94
...
...
@@ -9,6 +9,7 @@ from typing import List
from
distutils.version
import
LooseVersion
import
arrow
import
numpy
as
np
import
pandas
as
pd
from
sklearn
import
__version__
as
sklearn_version
from
sklearn.ensemble
import
RandomForestRegressor
as
RandomForestRegressorImpl
from
sklearn.ensemble
import
RandomForestClassifier
as
RandomForestClassifierImpl
...
...
@@ -194,9 +195,9 @@ class XGBTrainer(ModelBase):
self
.
impl
=
None
self
.
kwargs
=
kwargs
def
fit
(
self
,
x
,
y
):
def
fit
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarra
y
):
if
self
.
eval_sample
:
x_train
,
x_eval
,
y_train
,
y_eval
=
train_test_split
(
x
,
x_train
,
x_eval
,
y_train
,
y_eval
=
train_test_split
(
x
[
self
.
features
]
.
values
,
y
,
test_size
=
self
.
eval_sample
,
random_state
=
42
)
...
...
@@ -209,7 +210,7 @@ class XGBTrainer(ModelBase):
verbose_eval
=
False
,
**
self
.
kwargs
)
else
:
d_train
=
xgb
.
DMatrix
(
x
,
y
)
d_train
=
xgb
.
DMatrix
(
x
[
self
.
features
]
.
values
,
y
)
self
.
impl
=
xgb
.
train
(
params
=
self
.
params
,
dtrain
=
d_train
,
num_boost_round
=
self
.
num_boost_round
,
...
...
@@ -217,8 +218,8 @@ class XGBTrainer(ModelBase):
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
d_predict
=
xgb
.
DMatrix
(
x
)
def
predict
(
self
,
x
:
pd
.
DataFrame
)
->
np
.
ndarray
:
d_predict
=
xgb
.
DMatrix
(
x
[
self
.
features
]
.
values
)
return
self
.
impl
.
predict
(
d_predict
)
def
save
(
self
)
->
dict
:
...
...
alphamind/tests/model/test_linearmodel.py
View file @
ea76cc94
...
...
@@ -7,6 +7,7 @@ Created on 2017-9-4
import
unittest
import
numpy
as
np
import
pandas
as
pd
from
sklearn.linear_model
import
LinearRegression
as
LinearRegression2
from
alphamind.model.loader
import
load_model
from
alphamind.model.linearmodel
import
ConstLinearModel
...
...
@@ -19,10 +20,10 @@ class TestLinearModel(unittest.TestCase):
def
setUp
(
self
):
self
.
n
=
3
self
.
train_x
=
np
.
random
.
randn
(
1000
,
self
.
n
)
self
.
train_x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
1000
,
self
.
n
),
columns
=
[
'a'
,
'b'
,
'c'
]
)
self
.
train_y
=
np
.
random
.
randn
(
1000
)
self
.
train_y_label
=
np
.
where
(
self
.
train_y
>
0.
,
1
,
0
)
self
.
predict_x
=
np
.
random
.
randn
(
10
,
self
.
n
)
self
.
predict_x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
10
,
self
.
n
),
columns
=
[
'a'
,
'b'
,
'c'
]
)
def
test_const_linear_model
(
self
):
...
...
alphamind/tests/model/test_loader.py
View file @
ea76cc94
...
...
@@ -7,6 +7,7 @@ Created on 2017-9-5
import
unittest
import
numpy
as
np
import
pandas
as
pd
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.loader
import
load_model
...
...
@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
def
setUp
(
self
):
self
.
n
=
3
self
.
trained_x
=
np
.
random
.
randn
(
1000
,
self
.
n
)
self
.
trained_x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
1000
,
self
.
n
),
columns
=
[
'a'
,
'b'
,
'c'
]
)
self
.
trained_y
=
np
.
random
.
randn
(
1000
,
1
)
self
.
predict_x
=
np
.
random
.
randn
(
100
,
self
.
n
)
self
.
predict_x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
100
,
self
.
n
),
columns
=
[
'a'
,
'b'
,
'c'
]
)
def
test_load_model
(
self
):
model
=
LinearRegression
([
'a'
,
'b'
,
'c'
])
...
...
alphamind/tests/model/test_treemodel.py
View file @
ea76cc94
...
...
@@ -7,6 +7,7 @@ Created on 2018-1-5
import
unittest
import
numpy
as
np
import
pandas
as
pd
from
alphamind.model.loader
import
load_model
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model.treemodel
import
RandomForestClassifier
...
...
@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
class
TestTreeModel
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
x
=
np
.
random
.
randn
(
1000
,
10
)
self
.
features
=
list
(
'0123456789'
)
self
.
x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
1000
,
10
),
columns
=
self
.
features
)
self
.
y
=
np
.
random
.
randn
(
1000
)
self
.
sample_x
=
pd
.
DataFrame
(
np
.
random
.
randn
(
100
,
10
),
columns
=
self
.
features
)
def
test_random_forest_regress_persistence
(
self
):
model
=
RandomForestRegressor
(
features
=
list
(
range
(
10
))
)
model
=
RandomForestRegressor
(
features
=
self
.
features
)
model
.
fit
(
self
.
x
,
self
.
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
self
.
sample_x
),
new_model
.
predict
(
self
.
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
importances
,
new_model
.
importances
)
def
test_random_forest_classify_persistence
(
self
):
model
=
RandomForestClassifier
(
features
=
list
(
range
(
10
))
)
model
=
RandomForestClassifier
(
features
=
self
.
features
)
y
=
np
.
where
(
self
.
y
>
0
,
1
,
0
)
model
.
fit
(
self
.
x
,
y
)
...
...
@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
self
.
sample_x
),
new_model
.
predict
(
self
.
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
importances
,
new_model
.
importances
)
def
test_xgb_regress_persistence
(
self
):
model
=
XGBRegressor
(
features
=
list
(
range
(
10
))
)
model
=
XGBRegressor
(
features
=
self
.
features
)
model
.
fit
(
self
.
x
,
self
.
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
self
.
sample_x
),
new_model
.
predict
(
self
.
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
importances
,
new_model
.
importances
)
def
test_xgb_classify_persistence
(
self
):
model
=
XGBClassifier
(
features
=
list
(
range
(
10
))
)
model
=
XGBClassifier
(
features
=
self
.
features
)
y
=
np
.
where
(
self
.
y
>
0
,
1
,
0
)
model
.
fit
(
self
.
x
,
y
)
...
...
@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
self
.
sample_x
),
new_model
.
predict
(
self
.
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
importances
,
new_model
.
importances
)
def
test_xgb_trainer_equal_classifier
(
self
):
sample_x
=
np
.
random
.
randn
(
100
,
10
)
model1
=
XGBClassifier
(
n_estimators
=
100
,
learning_rate
=
0.1
,
max_depth
=
3
,
features
=
list
(
range
(
10
))
,
features
=
self
.
features
,
random_state
=
42
)
model2
=
XGBTrainer
(
features
=
list
(
range
(
10
))
,
model2
=
XGBTrainer
(
features
=
self
.
features
,
objective
=
'reg:logistic'
,
booster
=
'gbtree'
,
tree_method
=
'exact'
,
...
...
@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
model1
.
fit
(
self
.
x
,
y
)
model2
.
fit
(
self
.
x
,
y
)
predict1
=
model1
.
predict
(
sample_x
)
predict2
=
model2
.
predict
(
sample_x
)
predict1
=
model1
.
predict
(
s
elf
.
s
ample_x
)
predict2
=
model2
.
predict
(
s
elf
.
s
ample_x
)
predict2
=
np
.
where
(
predict2
>
0.5
,
1.
,
0.
)
np
.
testing
.
assert_array_almost_equal
(
predict1
,
predict2
)
def
test_xgb_trainer_persistence
(
self
):
model
=
XGBTrainer
(
features
=
list
(
range
(
10
))
,
model
=
XGBTrainer
(
features
=
self
.
features
,
objective
=
'binary:logistic'
,
booster
=
'gbtree'
,
tree_method
=
'hist'
,
...
...
@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
self
.
sample_x
),
new_model
.
predict
(
self
.
sample_x
))
np
.
testing
.
assert_array_almost_equal
(
model
.
importances
,
new_model
.
importances
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment