Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
c0e2023c
Commit
c0e2023c
authored
Jan 10, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added native xgboost model
parent
8b47319b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
97 additions
and
26 deletions
+97
-26
data_preparing.py
alphamind/model/data_preparing.py
+10
-25
modelbase.py
alphamind/model/modelbase.py
+1
-1
treemodel.py
alphamind/model/treemodel.py
+86
-0
No files found.
alphamind/model/data_preparing.py
View file @
c0e2023c
...
...
@@ -280,7 +280,7 @@ def fetch_train_phase(engine,
return_df
,
factor_df
=
df
[[
'trade_date'
,
'code'
,
'dx'
]],
df
[
[
'trade_date'
,
'code'
,
'isOpen'
]
+
transformer
.
names
]
return_df
,
dates
,
date_label
,
risk_exp
,
x_values
,
y_values
,
_
,
_
=
\
return_df
,
dates
,
date_label
,
risk_exp
,
x_values
,
y_values
,
_
,
_
,
codes
=
\
_merge_df
(
engine
,
transformer
.
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
...
...
@@ -293,6 +293,7 @@ def fetch_train_phase(engine,
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
this_raw_y
=
y_values
[
index
]
this_code
=
codes
[
index
]
if
risk_exp
is
not
None
:
this_risk_exp
=
risk_exp
[
index
]
else
:
...
...
@@ -310,7 +311,7 @@ def fetch_train_phase(engine,
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'train'
]
=
{
'x'
:
ne_x
,
'y'
:
ne_y
}
ret
[
'train'
]
=
{
'x'
:
ne_x
,
'y'
:
ne_y
,
'code'
:
this_code
}
return
ret
...
...
@@ -361,7 +362,6 @@ def fetch_predict_phase(engine,
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
# index = (date_label >= start) & (date_label <= end)
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
this_raw_x
=
x_values
[
left_index
:
right_index
]
...
...
@@ -399,27 +399,12 @@ def fetch_predict_phase(engine,
if
__name__
==
'__main__'
:
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'
ashare_ex
'
])
universe
=
Universe
(
'zz500'
,
[
'
hs300'
,
'zz500
'
])
neutralized_risk
=
[
'SIZE'
]
res
=
fetch_data_package
(
engine
,
[
'EPS'
,
'CFinc1'
],
'2017-09-01'
,
'2017-09-04'
,
'1w'
,
universe
,
benchmark
=
905
,
warm_start
=
1
,
neutralized_risk
=
neutralized_risk
)
print
(
res
)
res
=
fetch_predict_phase
(
engine
,
[
'EPS'
,
'CFinc1'
],
'2017-09-04'
,
'2w'
,
universe
,
4
,
warm_start
=
1
,
neutralized_risk
=
neutralized_risk
)
res
=
fetch_predict_phase
(
engine
,
[
'ep_q'
],
'2018-01-08'
,
'5b'
,
universe
,
16
,
neutralized_risk
=
neutralized_risk
)
print
(
res
)
alphamind/model/modelbase.py
View file @
c0e2023c
...
...
@@ -21,7 +21,7 @@ class ModelBase(metaclass=abc.ABCMeta):
self
.
impl
=
None
self
.
trained_time
=
None
def
fit
(
self
,
x
,
y
):
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarra
y
):
self
.
impl
.
fit
(
x
,
y
.
flatten
())
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
...
...
alphamind/model/treemodel.py
View file @
c0e2023c
...
...
@@ -7,9 +7,13 @@ Created on 2017-12-4
from
typing
import
List
from
distutils.version
import
LooseVersion
import
arrow
import
numpy
as
np
from
sklearn
import
__version__
as
sklearn_version
from
sklearn.ensemble
import
RandomForestRegressor
as
RandomForestRegressorImpl
from
sklearn.ensemble
import
RandomForestClassifier
as
RandomForestClassifierImpl
from
sklearn.model_selection
import
train_test_split
import
xgboost
as
xgb
from
xgboost
import
__version__
as
xgbboot_version
from
xgboost
import
XGBRegressor
as
XGBRegressorImpl
from
xgboost
import
XGBClassifier
as
XGBClassifierImpl
...
...
@@ -155,4 +159,86 @@ class XGBClassifier(ModelBase):
return
self
.
impl
.
feature_importances_
.
tolist
()
class
XGBTrainer
(
ModelBase
):
def
__init__
(
self
,
objective
,
booster
=
'gbtree'
,
tree_method
=
'hist'
,
n_estimators
:
int
=
100
,
learning_rate
:
float
=
0.1
,
max_depth
=
3
,
eval_sample
=
None
,
early_stopping_rounds
=
None
,
subsample
=
1.
,
colsample_bytree
=
1.
,
features
:
List
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
self
.
params
=
{
'silent'
:
1
,
'objective'
:
objective
,
'max_depth'
:
max_depth
,
'eta'
:
learning_rate
,
'booster'
:
booster
,
'tree_method'
:
tree_method
,
'subsample'
:
subsample
,
'colsample_bytree'
:
colsample_bytree
}
self
.
eval_sample
=
eval_sample
self
.
num_boost_round
=
n_estimators
self
.
early_stopping_rounds
=
early_stopping_rounds
self
.
impl
=
None
def
fit
(
self
,
x
,
y
):
if
self
.
eval_sample
:
x_train
,
x_eval
,
y_train
,
y_eval
=
train_test_split
(
x
,
y
,
test_size
=
self
.
eval_sample
,
random_state
=
42
)
d_train
=
xgb
.
DMatrix
(
x_train
,
y_train
)
d_eval
=
xgb
.
DMatrix
(
x_eval
,
y_eval
)
self
.
impl
=
xgb
.
train
(
params
=
self
.
params
,
dtrain
=
d_train
,
num_boost_round
=
self
.
num_boost_round
,
evals
=
[(
d_eval
,
'eval'
)],
verbose_eval
=
False
)
else
:
d_train
=
xgb
.
DMatrix
(
x
,
y
)
self
.
impl
=
xgb
.
train
(
params
=
self
.
params
,
dtrain
=
d_train
,
num_boost_round
=
self
.
num_boost_round
)
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
d_predict
=
xgb
.
DMatrix
(
x
)
return
self
.
impl
.
predict
(
d_predict
)
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
[
'xgbboot_version'
]
=
xgbboot_version
model_desc
[
'importances'
]
=
self
.
importances
return
model_desc
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
super
()
.
load
(
model_desc
)
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'xgbboot_version'
]):
alpha_logger
.
warning
(
'Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
xgbboot_version
,
model_desc
[
'xgbboot_version'
]))
return
obj_layout
@
property
def
importances
(
self
):
imps
=
self
.
impl
.
get_fscore
()
.
items
()
imps
=
sorted
(
imps
,
key
=
lambda
x
:
x
[
0
])
return
list
(
zip
(
*
imps
))[
1
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment