Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2553b951
Commit
2553b951
authored
Apr 12, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added fit target setting in models
parent
189ee654
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
59 additions
and
27 deletions
+59
-27
composer.py
alphamind/model/composer.py
+4
-1
data_preparing.py
alphamind/model/data_preparing.py
+21
-10
linearmodel.py
alphamind/model/linearmodel.py
+9
-8
modelbase.py
alphamind/model/modelbase.py
+15
-3
treemodel.py
alphamind/model/treemodel.py
+10
-5
No files found.
alphamind/model/composer.py
View file @
2553b951
...
@@ -9,6 +9,7 @@ import copy
...
@@ -9,6 +9,7 @@ import copy
import
bisect
import
bisect
from
typing
import
Iterable
from
typing
import
Iterable
import
pandas
as
pd
import
pandas
as
pd
from
typing
import
Union
from
simpleutils.miscellaneous
import
list_eq
from
simpleutils.miscellaneous
import
list_eq
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.data_preparing
import
fetch_train_phase
from
alphamind.model.data_preparing
import
fetch_train_phase
...
@@ -19,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
...
@@ -19,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
from
alphamind.data.rank
import
rank
from
alphamind.data.rank
import
rank
from
alphamind.data.standardize
import
standardize
from
alphamind.data.standardize
import
standardize
from
alphamind.model.loader
import
load_model
from
alphamind.model.loader
import
load_model
from
alphamind.data.transformer
import
Transformer
PROCESS_MAPPING
=
{
PROCESS_MAPPING
=
{
'winsorize_normal'
:
winsorize_normal
,
'winsorize_normal'
:
winsorize_normal
,
...
@@ -116,7 +118,8 @@ class DataMeta(object):
...
@@ -116,7 +118,8 @@ class DataMeta(object):
self
.
risk_model
,
self
.
risk_model
,
self
.
pre_process
,
self
.
pre_process
,
self
.
post_process
,
self
.
post_process
,
self
.
warm_start
)
self
.
warm_start
,
fit_target
=
alpha_model
.
fit_target
)
def
fetch_predict_data
(
self
,
def
fetch_predict_data
(
self
,
ref_date
:
str
,
ref_date
:
str
,
...
...
alphamind/model/data_preparing.py
View file @
2553b951
...
@@ -60,7 +60,8 @@ def prepare_data(engine: SqlEngine,
...
@@ -60,7 +60,8 @@ def prepare_data(engine: SqlEngine,
frequency
:
str
,
frequency
:
str
,
universe
:
Universe
,
universe
:
Universe
,
benchmark
:
int
,
benchmark
:
int
,
warm_start
:
int
=
0
):
warm_start
:
int
=
0
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
):
if
warm_start
>
0
:
if
warm_start
>
0
:
p
=
Period
(
frequency
)
p
=
Period
(
frequency
)
p
=
Period
(
length
=-
warm_start
*
p
.
length
(),
units
=
p
.
units
())
p
=
Period
(
length
=-
warm_start
*
p
.
length
(),
units
=
p
.
units
())
...
@@ -86,14 +87,22 @@ def prepare_data(engine: SqlEngine,
...
@@ -86,14 +87,22 @@ def prepare_data(engine: SqlEngine,
factors
=
transformer
,
factors
=
transformer
,
dates
=
dates
)
.
sort_values
([
'trade_date'
,
'code'
])
dates
=
dates
)
.
sort_values
([
'trade_date'
,
'code'
])
alpha_logger
.
info
(
"factor data loading finished"
)
alpha_logger
.
info
(
"factor data loading finished"
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
alpha_logger
.
info
(
"return data loading finished"
)
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
alpha_logger
.
info
(
"fit target data loading finished"
)
industry_df
=
engine
.
fetch_industry_range
(
universe
,
dates
=
dates
)
industry_df
=
engine
.
fetch_industry_range
(
universe
,
dates
=
dates
)
alpha_logger
.
info
(
"industry data loading finished"
)
alpha_logger
.
info
(
"industry data loading finished"
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
alpha_logger
.
info
(
"benchmark data loading finished"
)
alpha_logger
.
info
(
"benchmark data loading finished"
)
df
=
pd
.
merge
(
factor_df
,
return
_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
df
=
pd
.
merge
(
factor_df
,
target
_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
industry_df
,
on
=
[
'trade_date'
,
'code'
])
df
=
pd
.
merge
(
df
,
industry_df
,
on
=
[
'trade_date'
,
'code'
])
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
...
@@ -262,7 +271,7 @@ def fetch_train_phase(engine,
...
@@ -262,7 +271,7 @@ def fetch_train_phase(engine,
pre_process
:
Iterable
[
object
]
=
None
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
,
warm_start
:
int
=
0
,
fit
ting
_target
:
Union
[
Transformer
,
object
]
=
None
)
->
dict
:
fit_target
:
Union
[
Transformer
,
object
]
=
None
)
->
dict
:
if
isinstance
(
alpha_factors
,
Transformer
):
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
transformer
=
alpha_factors
else
:
else
:
...
@@ -282,11 +291,11 @@ def fetch_train_phase(engine,
...
@@ -282,11 +291,11 @@ def fetch_train_phase(engine,
horizon
=
map_freq
(
frequency
)
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
if
fit
ting
_target
is
None
:
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit
ting
_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
...
@@ -424,14 +433,16 @@ def fetch_predict_phase(engine,
...
@@ -424,14 +433,16 @@ def fetch_predict_phase(engine,
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
from
alphamind.api
import
risk_styles
,
industry_styles
,
standardize
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
neutralized_risk
=
[
'SIZE'
]
neutralized_risk
=
risk_styles
+
industry_styles
res
=
fetch_train_phase
(
engine
,
[
'ep_q'
],
res
=
fetch_train_phase
(
engine
,
[
'ep_q'
],
'2012-01-05'
,
'2012-01-05'
,
'5b'
,
'5b'
,
universe
,
universe
,
16
,
16
,
neutralized_risk
=
neutralized_risk
,
neutralized_risk
=
neutralized_risk
,
fitting_target
=
'closePrice'
)
post_process
=
[
standardize
],
fit_target
=
'closePrice'
)
print
(
res
)
print
(
res
)
alphamind/model/linearmodel.py
View file @
2553b951
...
@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
...
@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
def
__init__
(
self
,
def
__init__
(
self
,
features
=
None
,
features
=
None
,
weights
:
dict
=
None
):
weights
:
dict
=
None
,
super
()
.
__init__
(
features
)
fit_target
=
None
):
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
if
features
is
not
None
and
weights
is
not
None
:
if
features
is
not
None
and
weights
is
not
None
:
pyFinAssert
(
len
(
features
)
==
len
(
weights
),
pyFinAssert
(
len
(
features
)
==
len
(
weights
),
ValueError
,
ValueError
,
...
@@ -57,8 +58,8 @@ class ConstLinearModel(ModelBase):
...
@@ -57,8 +58,8 @@ class ConstLinearModel(ModelBase):
class
LinearRegression
(
ModelBase
):
class
LinearRegression
(
ModelBase
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
LinearRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
self
.
impl
=
LinearRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
...
@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
...
@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
class
LassoRegression
(
ModelBase
):
class
LassoRegression
(
ModelBase
):
def
__init__
(
self
,
alpha
=
0.01
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
def
__init__
(
self
,
alpha
=
0.01
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
Lasso
(
alpha
=
alpha
,
fit_intercept
=
fit_intercept
,
**
kwargs
)
self
.
impl
=
Lasso
(
alpha
=
alpha
,
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
...
@@ -111,8 +112,8 @@ class LassoRegression(ModelBase):
...
@@ -111,8 +112,8 @@ class LassoRegression(ModelBase):
class
LogisticRegression
(
ModelBase
):
class
LogisticRegression
(
ModelBase
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
LogisticRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
self
.
impl
=
LogisticRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
...
...
alphamind/model/modelbase.py
View file @
2553b951
...
@@ -18,12 +18,17 @@ from alphamind.data.transformer import Transformer
...
@@ -18,12 +18,17 @@ from alphamind.data.transformer import Transformer
class
ModelBase
(
metaclass
=
abc
.
ABCMeta
):
class
ModelBase
(
metaclass
=
abc
.
ABCMeta
):
def
__init__
(
self
,
features
=
None
):
def
__init__
(
self
,
features
=
None
,
fit_target
=
None
):
if
features
is
not
None
:
if
features
is
not
None
:
self
.
formulas
=
Transformer
(
features
)
self
.
formulas
=
Transformer
(
features
)
self
.
features
=
self
.
formulas
.
names
self
.
features
=
self
.
formulas
.
names
else
:
else
:
self
.
features
=
None
self
.
features
=
None
if
fit_target
is
not
None
:
self
.
fit_target
=
Transformer
(
fit_target
)
else
:
self
.
fit_target
=
None
self
.
impl
=
None
self
.
impl
=
None
self
.
trained_time
=
None
self
.
trained_time
=
None
...
@@ -31,7 +36,8 @@ class ModelBase(metaclass=abc.ABCMeta):
...
@@ -31,7 +36,8 @@ class ModelBase(metaclass=abc.ABCMeta):
return
encode
(
self
.
impl
)
==
encode
(
rhs
.
impl
)
\
return
encode
(
self
.
impl
)
==
encode
(
rhs
.
impl
)
\
and
self
.
trained_time
==
rhs
.
trained_time
\
and
self
.
trained_time
==
rhs
.
trained_time
\
and
list_eq
(
self
.
features
,
rhs
.
features
)
\
and
list_eq
(
self
.
features
,
rhs
.
features
)
\
and
encode
(
self
.
formulas
)
==
encode
(
rhs
.
formulas
)
and
encode
(
self
.
formulas
)
==
encode
(
rhs
.
formulas
)
\
and
encode
(
self
.
fit_target
)
==
encode
(
rhs
.
fit_target
)
def
fit
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarray
):
def
fit
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarray
):
self
.
impl
.
fit
(
x
[
self
.
features
]
.
values
,
y
.
flatten
())
self
.
impl
.
fit
(
x
[
self
.
features
]
.
values
,
y
.
flatten
())
...
@@ -56,15 +62,21 @@ class ModelBase(metaclass=abc.ABCMeta):
...
@@ -56,15 +62,21 @@ class ModelBase(metaclass=abc.ABCMeta):
trained_time
=
self
.
trained_time
,
trained_time
=
self
.
trained_time
,
desc
=
encode
(
self
.
impl
),
desc
=
encode
(
self
.
impl
),
formulas
=
encode
(
self
.
formulas
),
formulas
=
encode
(
self
.
formulas
),
fit_target
=
encode
(
self
.
fit_target
),
internal_model
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
)
internal_model
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
)
return
model_desc
return
model_desc
@
abc
.
abstractclassmethod
@
abc
.
abstractmethod
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
()
obj_layout
=
cls
()
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
formulas
=
decode
(
model_desc
[
'formulas'
])
obj_layout
.
formulas
=
decode
(
model_desc
[
'formulas'
])
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
if
'fit_target'
in
model_desc
:
obj_layout
.
fit_target
=
decode
(
model_desc
[
'fit_target'
])
else
:
obj_layout
.
fit_target
=
None
return
obj_layout
return
obj_layout
alphamind/model/treemodel.py
View file @
2553b951
...
@@ -27,8 +27,9 @@ class RandomForestRegressor(ModelBase):
...
@@ -27,8 +27,9 @@ class RandomForestRegressor(ModelBase):
n_estimators
:
int
=
100
,
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
max_features
:
str
=
'auto'
,
features
=
None
,
features
=
None
,
fit_target
=
None
,
**
kwargs
):
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
RandomForestRegressorImpl
(
n_estimators
=
n_estimators
,
self
.
impl
=
RandomForestRegressorImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
max_features
=
max_features
,
**
kwargs
)
**
kwargs
)
...
@@ -59,8 +60,9 @@ class RandomForestClassifier(ModelBase):
...
@@ -59,8 +60,9 @@ class RandomForestClassifier(ModelBase):
n_estimators
:
int
=
100
,
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
max_features
:
str
=
'auto'
,
features
=
None
,
features
=
None
,
fit_target
=
None
,
**
kwargs
):
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
RandomForestClassifierImpl
(
n_estimators
=
n_estimators
,
self
.
impl
=
RandomForestClassifierImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
max_features
=
max_features
,
**
kwargs
)
**
kwargs
)
...
@@ -92,9 +94,10 @@ class XGBRegressor(ModelBase):
...
@@ -92,9 +94,10 @@ class XGBRegressor(ModelBase):
learning_rate
:
float
=
0.1
,
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
max_depth
:
int
=
3
,
features
=
None
,
features
=
None
,
fit_target
=
None
,
n_jobs
:
int
=
1
,
n_jobs
:
int
=
1
,
**
kwargs
):
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
XGBRegressorImpl
(
n_estimators
=
n_estimators
,
self
.
impl
=
XGBRegressorImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
max_depth
=
max_depth
,
...
@@ -128,9 +131,10 @@ class XGBClassifier(ModelBase):
...
@@ -128,9 +131,10 @@ class XGBClassifier(ModelBase):
learning_rate
:
float
=
0.1
,
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
max_depth
:
int
=
3
,
features
=
None
,
features
=
None
,
fit_target
=
None
,
n_jobs
:
int
=
1
,
n_jobs
:
int
=
1
,
**
kwargs
):
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
XGBClassifierImpl
(
n_estimators
=
n_estimators
,
self
.
impl
=
XGBClassifierImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
max_depth
=
max_depth
,
...
@@ -171,10 +175,11 @@ class XGBTrainer(ModelBase):
...
@@ -171,10 +175,11 @@ class XGBTrainer(ModelBase):
subsample
=
1.
,
subsample
=
1.
,
colsample_bytree
=
1.
,
colsample_bytree
=
1.
,
features
=
None
,
features
=
None
,
fit_target
=
None
,
random_state
:
int
=
0
,
random_state
:
int
=
0
,
n_jobs
:
int
=
1
,
n_jobs
:
int
=
1
,
**
kwargs
):
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
params
=
{
self
.
params
=
{
'silent'
:
1
,
'silent'
:
1
,
'objective'
:
objective
,
'objective'
:
objective
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment