Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
4622cdf5
Unverified
Commit
4622cdf5
authored
Jan 08, 2018
by
lion-sing
Committed by
GitHub
Jan 08, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1 from alpha-miner/master
update repo
parents
575c79ca
b899c787
Changes
21
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
21 changed files
with
11090 additions
and
204 deletions
+11090
-204
.gitmodules
.gitmodules
+3
-0
README.md
README.md
+6
-6
__init__.py
alphamind/__init__.py
+3
-0
api.py
alphamind/api.py
+14
-26
models.py
alphamind/data/dbmodel/models.py
+0
-57
__init__.py
alphamind/model/__init__.py
+12
-1
data_preparing.py
alphamind/model/data_preparing.py
+13
-22
linearmodel.py
alphamind/model/linearmodel.py
+58
-60
loader.py
alphamind/model/loader.py
+15
-0
modelbase.py
alphamind/model/modelbase.py
+20
-7
treemodel.py
alphamind/model/treemodel.py
+127
-20
test_linearmodel.py
alphamind/tests/model/test_linearmodel.py
+36
-3
test_treemodel.py
alphamind/tests/model/test_treemodel.py
+75
-0
test_suite.py
alphamind/tests/test_suite.py
+2
-0
utilities.py
alphamind/utilities.py
+21
-0
build_linux_dependencies.sh
build_linux_dependencies.sh
+15
-2
build_windows_dependencies.bat
build_windows_dependencies.bat
+17
-0
candidate_prod_model_20171204.ipynb
notebooks/candidate_prod_model_20171204.ipynb
+5108
-0
model_comparing.ipynb
notebooks/model_comparing.ipynb
+413
-0
prod_model_20171117.ipynb
notebooks/prod_model_20171117.ipynb
+5131
-0
xgboost
xgboost
+1
-0
No files found.
.gitmodules
View file @
4622cdf5
[submodule "alphamind/pfopt"]
[submodule "alphamind/pfopt"]
path = alphamind/pfopt
path = alphamind/pfopt
url = https://github.com/alpha-miner/portfolio-optimizer.git
url = https://github.com/alpha-miner/portfolio-optimizer.git
[submodule "xgboost"]
path = xgboost
url = https://github.com/dmlc/xgboost.git
README.md
View file @
4622cdf5
...
@@ -42,16 +42,16 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
...
@@ -42,16 +42,16 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
在Windows上完整安装,需要有C++编译器(例如msvc):
在Windows上完整安装,需要有C++编译器(例如msvc):
```
bash
```bash
build_windows_dependencies.bat
build_windows_dependencies.bat
```
```
*
Linux
*
Linux
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
```
bash
```bash
build_linux_dependencies.sh
build_linux_dependencies.sh
```
```
## 安装
## 安装
...
...
alphamind/__init__.py
View file @
4622cdf5
...
@@ -4,3 +4,6 @@ Created on 2017-4-25
...
@@ -4,3 +4,6 @@ Created on 2017-4-25
@author: cheng.li
@author: cheng.li
"""
"""
__version__
=
"0.1.0"
alphamind/api.py
View file @
4622cdf5
...
@@ -25,11 +25,15 @@ from alphamind.data.standardize import projection
...
@@ -25,11 +25,15 @@ from alphamind.data.standardize import projection
from
alphamind.data.neutralize
import
neutralize
from
alphamind.data.neutralize
import
neutralize
from
alphamind.data.engines.sqlengine
import
factor_tables
from
alphamind.data.engines.sqlengine
import
factor_tables
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model
import
LinearRegression
from
alphamind.model.linearmodel
import
LassoRegression
from
alphamind.model
import
LassoRegression
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model
import
ConstLinearModel
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model
import
LogisticRegression
from
alphamind.model.loader
import
load_model
from
alphamind.model
import
RandomForestRegressor
from
alphamind.model
import
RandomForestClassifier
from
alphamind.model
import
XGBRegressor
from
alphamind.model
import
XGBClassifier
from
alphamind.model
import
load_model
from
alphamind.model.data_preparing
import
fetch_data_package
from
alphamind.model.data_preparing
import
fetch_data_package
from
alphamind.model.data_preparing
import
fetch_train_phase
from
alphamind.model.data_preparing
import
fetch_train_phase
...
@@ -39,27 +43,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
...
@@ -39,27 +43,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
from
alphamind.execution.pipeline
import
ExecutionPipeline
from
alphamind.execution.pipeline
import
ExecutionPipeline
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
map_freq
def
map_freq
(
freq
):
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'2w'
:
horizon
=
9
elif
freq
==
'3w'
:
horizon
=
14
elif
freq
==
'4w'
:
horizon
=
19
elif
freq
==
'1d'
:
horizon
=
0
elif
freq
[
-
1
]
==
"b"
:
horizon
=
int
(
freq
[:
-
1
])
-
1
else
:
raise
ValueError
(
"Unrecognized freq: {0}"
.
format
(
freq
))
return
horizon
__all__
=
[
__all__
=
[
...
@@ -85,7 +69,11 @@ __all__ = [
...
@@ -85,7 +69,11 @@ __all__ = [
'LinearRegression'
,
'LinearRegression'
,
'LassoRegression'
,
'LassoRegression'
,
'ConstLinearModel'
,
'ConstLinearModel'
,
'LogisticRegression'
,
'RandomForestRegressor'
,
'RandomForestRegressor'
,
'RandomForestClassifier'
,
'XGBRegressor'
,
'XGBClassifier'
,
'load_model'
,
'load_model'
,
'NaiveExecutor'
,
'NaiveExecutor'
,
'ThresholdExecutor'
,
'ThresholdExecutor'
,
...
...
alphamind/data/dbmodel/models.py
View file @
4622cdf5
...
@@ -664,63 +664,6 @@ class Experimental(Base):
...
@@ -664,63 +664,6 @@ class Experimental(Base):
DROEAfterNonRecurring
=
Column
(
Float
(
53
))
DROEAfterNonRecurring
=
Column
(
Float
(
53
))
CFinc1
=
Column
(
Float
(
53
))
CFinc1
=
Column
(
Float
(
53
))
xueqiu_hotness
=
Column
(
Float
(
53
))
xueqiu_hotness
=
Column
(
Float
(
53
))
con_eps
=
Column
(
Float
(
53
))
con_pb
=
Column
(
Float
(
53
))
con_pb_order
=
Column
(
Float
(
53
))
con_pb_rolling
=
Column
(
Float
(
53
))
con_pb_rolling_order
=
Column
(
Float
(
53
))
con_pe
=
Column
(
Float
(
53
))
con_pe_order
=
Column
(
Float
(
53
))
con_pe_rolling
=
Column
(
Float
(
53
))
con_pe_rolling_order
=
Column
(
Float
(
53
))
con_peg
=
Column
(
Float
(
53
))
con_peg_order
=
Column
(
Float
(
53
))
con_peg_rolling
=
Column
(
Float
(
53
))
con_peg_rolling_order
=
Column
(
Float
(
53
))
con_ps
=
Column
(
Float
(
53
))
con_ps_order
=
Column
(
Float
(
53
))
con_ps_rolling
=
Column
(
Float
(
53
))
con_ps_rolling_order
=
Column
(
Float
(
53
))
con_target_price
=
Column
(
Float
(
53
))
market_confidence_10d
=
Column
(
Float
(
53
))
market_confidence_15d
=
Column
(
Float
(
53
))
market_confidence_25d
=
Column
(
Float
(
53
))
market_confidence_5d
=
Column
(
Float
(
53
))
market_confidence_75d
=
Column
(
Float
(
53
))
optimism_confidence_10d
=
Column
(
Float
(
53
))
optimism_confidence_15d
=
Column
(
Float
(
53
))
optimism_confidence_25d
=
Column
(
Float
(
53
))
optimism_confidence_5d
=
Column
(
Float
(
53
))
optimism_confidence_75d
=
Column
(
Float
(
53
))
pessimism_confidence_10d
=
Column
(
Float
(
53
))
pessimism_confidence_15d
=
Column
(
Float
(
53
))
pessimism_confidence_25d
=
Column
(
Float
(
53
))
pessimism_confidence_5d
=
Column
(
Float
(
53
))
pessimism_confidence_75d
=
Column
(
Float
(
53
))
con_na_yoy
=
Column
(
Float
(
53
))
con_np_yoy
=
Column
(
Float
(
53
))
con_npcgrate_13w
=
Column
(
Float
(
53
))
con_npcgrate_1w
=
Column
(
Float
(
53
))
con_npcgrate_26w
=
Column
(
Float
(
53
))
con_npcgrate_2y
=
Column
(
Float
(
53
))
con_npcgrate_4w
=
Column
(
Float
(
53
))
con_npcgrate_52w
=
Column
(
Float
(
53
))
con_or_yoy
=
Column
(
Float
(
53
))
con_roe_yoy1
=
Column
(
Float
(
53
))
con_roe_yoy2
=
Column
(
Float
(
53
))
con_roe_yoy3
=
Column
(
Float
(
53
))
con_eps_rolling
=
Column
(
Float
(
53
))
con_np
=
Column
(
Float
(
53
))
con_np_rolling
=
Column
(
Float
(
53
))
con_or
=
Column
(
Float
(
53
))
con_or_rolling
=
Column
(
Float
(
53
))
con_roe
=
Column
(
Float
(
53
))
con_na
=
Column
(
Float
(
53
))
con_na_rolling
=
Column
(
Float
(
53
))
mcap
=
Column
(
Float
(
53
))
tcap
=
Column
(
Float
(
53
))
ta
=
Column
(
Float
(
53
))
na
=
Column
(
Float
(
53
))
eps_q
=
Column
(
Float
(
53
))
eps_q
=
Column
(
Float
(
53
))
roe_q
=
Column
(
Float
(
53
))
roe_q
=
Column
(
Float
(
53
))
cfinc1_q
=
Column
(
Float
(
53
))
cfinc1_q
=
Column
(
Float
(
53
))
...
...
alphamind/model/__init__.py
View file @
4622cdf5
...
@@ -8,11 +8,22 @@ Created on 2017-5-2
...
@@ -8,11 +8,22 @@ Created on 2017-5-2
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.linearmodel
import
LassoRegression
from
alphamind.model.linearmodel
import
LassoRegression
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
LogisticRegression
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model.treemodel
import
RandomForestClassifier
from
alphamind.model.treemodel
import
XGBRegressor
from
alphamind.model.treemodel
import
XGBClassifier
from
alphamind.model.loader
import
load_model
__all__
=
[
'LinearRegression'
,
__all__
=
[
'LinearRegression'
,
'LassoRegression'
,
'LassoRegression'
,
'ConstLinearModel'
,
'ConstLinearModel'
,
'RandomForestRegressor'
]
'LogisticRegression'
,
\ No newline at end of file
'RandomForestRegressor'
,
'RandomForestClassifier'
,
'XGBRegressor'
,
'XGBClassifier'
,
'load_model'
]
\ No newline at end of file
alphamind/model/data_preparing.py
View file @
4622cdf5
...
@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
...
@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
from
PyFin.api
import
DateGeneration
from
PyFin.api
import
DateGeneration
from
PyFin.api
import
advanceDateByCalendar
from
PyFin.api
import
advanceDateByCalendar
from
PyFin.DateUtilities
import
Period
from
PyFin.DateUtilities
import
Period
from
PyFin.Enums
import
TimeUnits
from
alphamind.data.transformer
import
Transformer
from
alphamind.data.transformer
import
Transformer
from
alphamind.data.engines.sqlengine
import
SqlEngine
from
alphamind.data.engines.sqlengine
import
SqlEngine
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.processing
import
factor_processing
from
alphamind.data.processing
import
factor_processing
from
alphamind.data.engines.sqlengine
import
total_risk_factors
from
alphamind.data.engines.sqlengine
import
total_risk_factors
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
map_freq
def
_map_horizon
(
frequency
:
str
)
->
int
:
parsed_period
=
Period
(
frequency
)
unit
=
parsed_period
.
units
()
length
=
parsed_period
.
length
()
if
unit
==
TimeUnits
.
BDays
or
unit
==
TimeUnits
.
Days
:
return
length
-
1
elif
unit
==
TimeUnits
.
Weeks
:
return
5
*
length
-
1
elif
unit
==
TimeUnits
.
Months
:
return
22
*
length
-
1
else
:
raise
ValueError
(
'{0} is an unrecognized frequency rule'
.
format
(
frequency
))
def
_merge_df
(
engine
,
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
):
def
_merge_df
(
engine
,
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
):
...
@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
...
@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
dates
=
[
d
.
strftime
(
'
%
Y-
%
m-
%
d'
)
for
d
in
dates
]
dates
=
[
d
.
strftime
(
'
%
Y-
%
m-
%
d'
)
for
d
in
dates
]
horizon
=
_map_horizon
(
frequency
)
horizon
=
map_freq
(
frequency
)
if
isinstance
(
factors
,
Transformer
):
if
isinstance
(
factors
,
Transformer
):
transformer
=
factors
transformer
=
factors
...
@@ -119,8 +105,10 @@ def batch_processing(x_values,
...
@@ -119,8 +105,10 @@ def batch_processing(x_values,
post_process
):
post_process
):
train_x_buckets
=
{}
train_x_buckets
=
{}
train_y_buckets
=
{}
train_y_buckets
=
{}
train_risk_buckets
=
{}
predict_x_buckets
=
{}
predict_x_buckets
=
{}
predict_y_buckets
=
{}
predict_y_buckets
=
{}
predict_risk_buckets
=
{}
for
i
,
start
in
enumerate
(
groups
[:
-
batch
]):
for
i
,
start
in
enumerate
(
groups
[:
-
batch
]):
end
=
groups
[
i
+
batch
]
end
=
groups
[
i
+
batch
]
...
@@ -146,6 +134,8 @@ def batch_processing(x_values,
...
@@ -146,6 +134,8 @@ def batch_processing(x_values,
risk_factors
=
this_risk_exp
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
post_process
=
post_process
)
train_risk_buckets
[
end
]
=
this_risk_exp
left_index
=
bisect
.
bisect_right
(
group_label
,
start
)
left_index
=
bisect
.
bisect_right
(
group_label
,
start
)
right_index
=
bisect
.
bisect_right
(
group_label
,
end
)
right_index
=
bisect
.
bisect_right
(
group_label
,
end
)
...
@@ -165,6 +155,7 @@ def batch_processing(x_values,
...
@@ -165,6 +155,7 @@ def batch_processing(x_values,
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
predict_x_buckets
[
end
]
=
ne_x
[
inner_left_index
:
inner_right_index
]
predict_x_buckets
[
end
]
=
ne_x
[
inner_left_index
:
inner_right_index
]
predict_risk_buckets
[
end
]
=
this_risk_exp
[
inner_left_index
:
inner_right_index
]
this_raw_y
=
y_values
[
left_index
:
right_index
]
this_raw_y
=
y_values
[
left_index
:
right_index
]
if
len
(
this_raw_y
)
>
0
:
if
len
(
this_raw_y
)
>
0
:
...
@@ -174,7 +165,7 @@ def batch_processing(x_values,
...
@@ -174,7 +165,7 @@ def batch_processing(x_values,
post_process
=
post_process
)
post_process
=
post_process
)
predict_y_buckets
[
end
]
=
ne_y
[
inner_left_index
:
inner_right_index
]
predict_y_buckets
[
end
]
=
ne_y
[
inner_left_index
:
inner_right_index
]
return
train_x_buckets
,
train_y_buckets
,
predict_x_buckets
,
predict_y
_buckets
return
train_x_buckets
,
train_y_buckets
,
train_risk_buckets
,
predict_x_buckets
,
predict_y_buckets
,
predict_risk
_buckets
def
fetch_data_package
(
engine
:
SqlEngine
,
def
fetch_data_package
(
engine
:
SqlEngine
,
...
@@ -216,7 +207,7 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -216,7 +207,7 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger
.
info
(
"Loading data is finished"
)
alpha_logger
.
info
(
"Loading data is finished"
)
train_x_buckets
,
train_y_buckets
,
predict_x_buckets
,
predict_y
_buckets
=
batch_processing
(
train_x_buckets
,
train_y_buckets
,
train_risk_buckets
,
predict_x_buckets
,
predict_y_buckets
,
predict_risk
_buckets
=
batch_processing
(
x_values
,
x_values
,
y_values
,
y_values
,
dates
,
dates
,
...
@@ -231,8 +222,8 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -231,8 +222,8 @@ def fetch_data_package(engine: SqlEngine,
ret
=
dict
()
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'settlement'
]
=
return_df
ret
[
'settlement'
]
=
return_df
ret
[
'train'
]
=
{
'x'
:
train_x_buckets
,
'y'
:
train_y_buckets
}
ret
[
'train'
]
=
{
'x'
:
train_x_buckets
,
'y'
:
train_y_buckets
,
'risk'
:
train_risk_buckets
}
ret
[
'predict'
]
=
{
'x'
:
predict_x_buckets
,
'y'
:
predict_y_buckets
}
ret
[
'predict'
]
=
{
'x'
:
predict_x_buckets
,
'y'
:
predict_y_buckets
,
'risk'
:
predict_risk_buckets
}
return
ret
return
ret
...
@@ -260,7 +251,7 @@ def fetch_train_phase(engine,
...
@@ -260,7 +251,7 @@ def fetch_train_phase(engine,
dateRule
=
BizDayConventions
.
Following
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
_map_horizon
(
frequency
)
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
...
@@ -339,10 +330,10 @@ def fetch_predict_phase(engine,
...
@@ -339,10 +330,10 @@ def fetch_predict_phase(engine,
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
else
:
else
:
train_x
=
factor_df
.
copy
()
train_x
=
factor_df
.
copy
()
risk_exp
=
None
risk_exp
=
None
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
dates
=
np
.
unique
(
date_label
)
...
...
alphamind/model/linearmodel.py
View file @
4622cdf5
...
@@ -6,87 +6,75 @@ Created on 2017-5-10
...
@@ -6,87 +6,75 @@ Created on 2017-5-10
"""
"""
import
numpy
as
np
import
numpy
as
np
import
arrow
from
distutils.version
import
LooseVersion
from
distutils.version
import
LooseVersion
from
sklearn
import
__version__
as
sklearn_version
from
sklearn
import
__version__
as
sklearn_version
from
sklearn.linear_model
import
LinearRegression
as
LinearRegressionImpl
from
sklearn.linear_model
import
LinearRegression
as
LinearRegressionImpl
from
sklearn.linear_model
import
Lasso
from
sklearn.linear_model
import
Lasso
from
sklearn.linear_model
import
LogisticRegression
as
LogisticRegressionImpl
from
PyFin.api
import
pyFinAssert
from
PyFin.api
import
pyFinAssert
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
encode
from
alphamind.utilities
import
decode
class
ConstLinearModelImpl
(
object
):
def
__init__
(
self
,
weights
:
np
.
ndarray
=
None
):
self
.
weights
=
np
.
array
(
weights
)
.
flatten
()
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
pass
def
predict
(
self
,
x
:
np
.
ndarray
):
return
x
@
self
.
weights
class
ConstLinearModel
(
ModelBase
):
class
ConstLinearModel
(
ModelBase
):
def
__init__
(
self
,
def
__init__
(
self
,
features
:
list
=
None
,
features
:
list
=
None
,
weights
:
np
.
ndarray
=
None
):
weights
:
np
.
ndarray
=
None
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
)
if
features
is
not
None
and
weights
is
not
None
:
if
features
is
not
None
and
weights
is
not
None
:
pyFinAssert
(
len
(
features
)
==
len
(
weights
),
pyFinAssert
(
len
(
features
)
==
len
(
weights
),
ValueError
,
ValueError
,
"length of features is not equal to length of weights"
)
"length of features is not equal to length of weights"
)
self
.
weights
=
np
.
array
(
weights
)
.
flatten
()
self
.
impl
=
ConstLinearModelImpl
(
weights
)
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
pass
def
predict
(
self
,
x
):
return
x
@
self
.
weights
def
save
(
self
):
def
save
(
self
):
model_desc
=
super
()
.
save
()
model_desc
=
super
()
.
save
()
model_desc
[
'weight'
]
=
list
(
self
.
weights
)
model_desc
[
'weight'
]
=
list
(
self
.
impl
.
weights
)
return
model_desc
return
model_desc
@
classmethod
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
()
return
super
()
.
load
(
model_desc
)
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
weights
=
np
.
array
(
model_desc
[
'weight'
])
@
property
return
obj_layout
def
weights
(
self
):
return
self
.
impl
.
weights
.
tolist
()
class
LinearRegression
(
ModelBase
):
class
LinearRegression
(
ModelBase
):
def
__init__
(
self
,
features
:
list
=
None
,
fit_intercept
:
bool
=
False
):
def
__init__
(
self
,
features
:
list
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
)
self
.
impl
=
LinearRegressionImpl
(
fit_intercept
=
fit_intercept
)
self
.
impl
=
LinearRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
self
.
trained_time
=
None
self
.
trained_time
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
self
.
impl
.
fit
(
x
,
y
)
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
return
self
.
impl
.
predict
(
x
)
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
=
super
()
.
save
()
model_desc
[
'internal_model'
]
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
model_desc
[
'desc'
]
=
encode
(
self
.
impl
)
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'trained_time'
]
=
self
.
trained_time
model_desc
[
'weight'
]
=
self
.
impl
.
coef_
.
tolist
()
model_desc
[
'weight'
]
=
self
.
impl
.
coef_
.
tolist
()
return
model_desc
return
model_desc
def
score
(
self
)
->
float
:
return
self
.
impl
.
score
()
@
classmethod
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
()
obj_layout
=
super
()
.
load
(
model_desc
)
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
'Loaded model may work incorrectly.'
.
format
(
sklearn_version
,
sklearn_version
,
model_desc
[
'sklearn_version'
]))
model_desc
[
'sklearn_version'
]))
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
return
obj_layout
return
obj_layout
@
property
@
property
...
@@ -96,42 +84,52 @@ class LinearRegression(ModelBase):
...
@@ -96,42 +84,52 @@ class LinearRegression(ModelBase):
class
LassoRegression
(
ModelBase
):
class
LassoRegression
(
ModelBase
):
def
__init__
(
self
,
alpha
,
features
:
list
=
None
,
fit_intercept
:
bool
=
False
):
def
__init__
(
self
,
alpha
=
0.01
,
features
:
list
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
)
self
.
impl
=
Lasso
(
alpha
=
alpha
,
fit_intercept
=
fit_intercept
)
self
.
impl
=
Lasso
(
alpha
=
alpha
,
fit_intercept
=
fit_intercept
,
**
kwargs
)
self
.
trained_time
=
None
self
.
trained_time
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
def
save
(
self
)
->
dict
:
self
.
impl
.
fit
(
x
,
y
)
model_desc
=
super
()
.
save
()
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'weight'
]
=
self
.
impl
.
coef_
.
tolist
()
return
model_desc
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
super
()
.
load
(
model_desc
)
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
return
self
.
impl
.
predict
(
x
)
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
sklearn_version
,
model_desc
[
'sklearn_version'
]))
return
obj_layout
@
property
def
weights
(
self
):
return
self
.
impl
.
coef_
.
tolist
()
class
LogisticRegression
(
ModelBase
):
def
__init__
(
self
,
features
:
list
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
self
.
impl
=
LogisticRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
=
super
()
.
save
()
model_desc
[
'internal_model'
]
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
model_desc
[
'desc'
]
=
encode
(
self
.
impl
)
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'trained_time'
]
=
self
.
trained_time
model_desc
[
'weight'
]
=
self
.
impl
.
coef_
.
tolist
()
model_desc
[
'weight'
]
=
self
.
impl
.
coef_
.
tolist
()
return
model_desc
return
model_desc
def
score
(
self
)
->
float
:
return
self
.
impl
.
score
()
@
classmethod
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
(
alpha
=
0.
)
obj_layout
=
super
()
.
load
(
model_desc
)
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
'Loaded model may work incorrectly.'
.
format
(
sklearn_version
,
sklearn_version
,
model_desc
[
'sklearn_version'
]))
model_desc
[
'sklearn_version'
]))
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
return
obj_layout
return
obj_layout
@
property
@
property
...
@@ -140,8 +138,8 @@ class LassoRegression(ModelBase):
...
@@ -140,8 +138,8 @@ class LassoRegression(ModelBase):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
pprint
import
pprint
ls
=
ConstLinearModel
([
'a'
,
'b'
],
np
.
array
([
0.5
,
0.5
]))
ls
=
ConstLinearModel
([
'a'
,
'b'
],
np
.
array
([
0.5
,
0.5
]))
x
=
np
.
array
([[
0.2
,
0.2
],
x
=
np
.
array
([[
0.2
,
0.2
],
...
...
alphamind/model/loader.py
View file @
4622cdf5
...
@@ -9,6 +9,11 @@ from alphamind.model.modelbase import ModelBase
...
@@ -9,6 +9,11 @@ from alphamind.model.modelbase import ModelBase
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.linearmodel
import
LassoRegression
from
alphamind.model.linearmodel
import
LassoRegression
from
alphamind.model.linearmodel
import
LogisticRegression
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model.treemodel
import
RandomForestClassifier
from
alphamind.model.treemodel
import
XGBRegressor
from
alphamind.model.treemodel
import
XGBClassifier
def
load_model
(
model_desc
:
dict
)
->
ModelBase
:
def
load_model
(
model_desc
:
dict
)
->
ModelBase
:
...
@@ -22,5 +27,15 @@ def load_model(model_desc: dict) -> ModelBase:
...
@@ -22,5 +27,15 @@ def load_model(model_desc: dict) -> ModelBase:
return
LinearRegression
.
load
(
model_desc
)
return
LinearRegression
.
load
(
model_desc
)
elif
'LassoRegression'
in
model_name_parts
:
elif
'LassoRegression'
in
model_name_parts
:
return
LassoRegression
.
load
(
model_desc
)
return
LassoRegression
.
load
(
model_desc
)
elif
'LogisticRegression'
in
model_name_parts
:
return
LogisticRegression
.
load
(
model_desc
)
elif
'RandomForestRegressor'
in
model_name_parts
:
return
RandomForestRegressor
.
load
(
model_desc
)
elif
'RandomForestClassifier'
in
model_name_parts
:
return
RandomForestClassifier
.
load
(
model_desc
)
elif
'XGBRegressor'
in
model_name_parts
:
return
XGBRegressor
.
load
(
model_desc
)
elif
'XGBClassifier'
in
model_name_parts
:
return
XGBClassifier
.
load
(
model_desc
)
else
:
else
:
raise
ValueError
(
'{0} is not currently supported in model loader.'
.
format
(
model_name
))
raise
ValueError
(
'{0} is not currently supported in model loader.'
.
format
(
model_name
))
alphamind/model/modelbase.py
View file @
4622cdf5
...
@@ -9,6 +9,8 @@ import abc
...
@@ -9,6 +9,8 @@ import abc
import
arrow
import
arrow
import
numpy
as
np
import
numpy
as
np
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
encode
from
alphamind.utilities
import
decode
class
ModelBase
(
metaclass
=
abc
.
ABCMeta
):
class
ModelBase
(
metaclass
=
abc
.
ABCMeta
):
...
@@ -16,14 +18,18 @@ class ModelBase(metaclass=abc.ABCMeta):
...
@@ -16,14 +18,18 @@ class ModelBase(metaclass=abc.ABCMeta):
def
__init__
(
self
,
features
:
list
=
None
):
def
__init__
(
self
,
features
:
list
=
None
):
if
features
is
not
None
:
if
features
is
not
None
:
self
.
features
=
list
(
features
)
self
.
features
=
list
(
features
)
self
.
impl
=
None
self
.
trained_time
=
None
@
abc
.
abstractmethod
def
fit
(
self
,
x
,
y
):
def
fit
(
self
,
x
,
y
):
pass
self
.
impl
.
fit
(
x
,
y
.
flatten
())
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
@
abc
.
abstractmethod
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
def
predict
(
self
,
x
)
->
np
.
ndarray
:
return
self
.
impl
.
predict
(
x
)
pass
def
score
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
)
->
float
:
return
self
.
impl
.
score
(
x
,
y
)
@
abc
.
abstractmethod
@
abc
.
abstractmethod
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
...
@@ -34,10 +40,17 @@ class ModelBase(metaclass=abc.ABCMeta):
...
@@ -34,10 +40,17 @@ class ModelBase(metaclass=abc.ABCMeta):
model_desc
=
dict
(
model_name
=
self
.
__class__
.
__module__
+
"."
+
self
.
__class__
.
__name__
,
model_desc
=
dict
(
model_name
=
self
.
__class__
.
__module__
+
"."
+
self
.
__class__
.
__name__
,
language
=
'python'
,
language
=
'python'
,
saved_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
),
saved_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
),
features
=
list
(
self
.
features
))
features
=
list
(
self
.
features
),
trained_time
=
self
.
trained_time
,
desc
=
encode
(
self
.
impl
),
internal_model
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
)
return
model_desc
return
model_desc
@
abc
.
abstractclassmethod
@
abc
.
abstractclassmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
pass
obj_layout
=
cls
()
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
return
obj_layout
alphamind/model/treemodel.py
View file @
4622cdf5
...
@@ -5,47 +5,154 @@ Created on 2017-12-4
...
@@ -5,47 +5,154 @@ Created on 2017-12-4
@author: cheng.li
@author: cheng.li
"""
"""
import
arrow
from
typing
import
List
import
numpy
as
np
from
distutils.version
import
LooseVersion
from
distutils.version
import
LooseVersion
from
sklearn
import
__version__
as
sklearn_version
from
sklearn
import
__version__
as
sklearn_version
from
sklearn.ensemble
import
RandomForestRegressor
as
RandomForestRegressorImpl
from
sklearn.ensemble
import
RandomForestRegressor
as
RandomForestRegressorImpl
from
sklearn.ensemble
import
RandomForestClassifier
as
RandomForestClassifierImpl
from
xgboost
import
__version__
as
xgbboot_version
from
xgboost
import
XGBRegressor
as
XGBRegressorImpl
from
xgboost
import
XGBClassifier
as
XGBClassifierImpl
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
alpha_logger
from
alphamind.utilities
import
encode
from
alphamind.utilities
import
decode
class
RandomForestRegressor
(
ModelBase
):
class
RandomForestRegressor
(
ModelBase
):
def
__init__
(
self
,
n_estimators
,
features
=
None
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
features
:
List
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
)
self
.
impl
=
RandomForestRegressorImpl
(
n_estimators
,
*
args
,
**
kwargs
)
self
.
impl
=
RandomForestRegressorImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
**
kwargs
)
self
.
trained_time
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
def
save
(
self
)
->
dict
:
self
.
impl
.
fit
(
x
,
y
)
model_desc
=
super
()
.
save
()
self
.
trained_time
=
arrow
.
now
()
.
format
(
"YYYY-MM-DD HH:mm:ss"
)
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'importances'
]
=
self
.
importances
return
model_desc
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
super
()
.
load
(
model_desc
)
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
sklearn_version
,
model_desc
[
'sklearn_version'
]))
return
obj_layout
@
property
def
importances
(
self
):
return
self
.
impl
.
feature_importances_
.
tolist
()
def
predict
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
return
self
.
impl
.
predict
(
x
)
class
RandomForestClassifier
(
ModelBase
):
def
__init__
(
self
,
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
features
:
List
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
self
.
impl
=
RandomForestClassifierImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
**
kwargs
)
self
.
trained_time
=
None
def
save
(
self
)
->
dict
:
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
=
super
()
.
save
()
model_desc
[
'internal_model'
]
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
model_desc
[
'desc'
]
=
encode
(
self
.
impl
)
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'sklearn_version'
]
=
sklearn_version
model_desc
[
'trained_time'
]
=
self
.
trained_time
model_desc
[
'importances'
]
=
self
.
importances
return
model_desc
@
classmethod
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
()
obj_layout
=
super
()
.
load
(
model_desc
)
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'sklearn_version'
]):
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
alpha_logger
.
warning
(
'Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
'Loaded model may work incorrectly.'
.
format
(
sklearn_version
,
sklearn_version
,
model_desc
[
'sklearn_version'
]))
model_desc
[
'sklearn_version'
]))
return
obj_layout
@
property
def
importances
(
self
):
return
self
.
impl
.
feature_importances_
.
tolist
()
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
class
XGBRegressor
(
ModelBase
):
def
__init__
(
self
,
n_estimators
:
int
=
100
,
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
features
:
List
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
self
.
impl
=
XGBRegressorImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
**
kwargs
)
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
[
'xgbboot_version'
]
=
xgbboot_version
model_desc
[
'importances'
]
=
self
.
importances
return
model_desc
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
super
()
.
load
(
model_desc
)
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'xgbboot_version'
]):
alpha_logger
.
warning
(
'Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
xgbboot_version
,
model_desc
[
'xgbboot_version'
]))
return
obj_layout
return
obj_layout
@
property
def
importances
(
self
):
return
self
.
impl
.
feature_importances_
.
tolist
()
class
XGBClassifier
(
ModelBase
):
def
__init__
(
self
,
n_estimators
:
int
=
100
,
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
features
:
List
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
self
.
impl
=
XGBClassifierImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
**
kwargs
)
def
save
(
self
)
->
dict
:
model_desc
=
super
()
.
save
()
model_desc
[
'xgbboot_version'
]
=
xgbboot_version
model_desc
[
'importances'
]
=
self
.
importances
return
model_desc
@
classmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
super
()
.
load
(
model_desc
)
if
LooseVersion
(
sklearn_version
)
<
LooseVersion
(
model_desc
[
'xgbboot_version'
]):
alpha_logger
.
warning
(
'Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'
.
format
(
xgbboot_version
,
model_desc
[
'xgbboot_version'
]))
return
obj_layout
@
property
def
importances
(
self
):
return
self
.
impl
.
feature_importances_
.
tolist
()
alphamind/tests/model/test_linearmodel.py
View file @
4622cdf5
...
@@ -8,8 +8,11 @@ Created on 2017-9-4
...
@@ -8,8 +8,11 @@ Created on 2017-9-4
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
sklearn.linear_model
import
LinearRegression
as
LinearRegression2
from
sklearn.linear_model
import
LinearRegression
as
LinearRegression2
from
alphamind.model.loader
import
load_model
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
ConstLinearModel
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.model.linearmodel
import
LinearRegression
from
sklearn.linear_model
import
LogisticRegression
as
LogisticRegression2
from
alphamind.model.linearmodel
import
LogisticRegression
class
TestLinearModel
(
unittest
.
TestCase
):
class
TestLinearModel
(
unittest
.
TestCase
):
...
@@ -17,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
...
@@ -17,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
n
=
3
self
.
n
=
3
self
.
train_x
=
np
.
random
.
randn
(
1000
,
self
.
n
)
self
.
train_x
=
np
.
random
.
randn
(
1000
,
self
.
n
)
self
.
train_y
=
np
.
random
.
randn
(
1000
,
1
)
self
.
train_y
=
np
.
random
.
randn
(
1000
)
self
.
train_y_label
=
np
.
where
(
self
.
train_y
>
0.
,
1
,
0
)
self
.
predict_x
=
np
.
random
.
randn
(
10
,
self
.
n
)
self
.
predict_x
=
np
.
random
.
randn
(
10
,
self
.
n
)
def
test_const_linear_model
(
self
):
def
test_const_linear_model
(
self
):
...
@@ -36,7 +40,7 @@ class TestLinearModel(unittest.TestCase):
...
@@ -36,7 +40,7 @@ class TestLinearModel(unittest.TestCase):
weights
=
weights
)
weights
=
weights
)
desc
=
model
.
save
()
desc
=
model
.
save
()
new_model
=
ConstLinearModel
.
load
(
desc
)
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
np
.
testing
.
assert_array_almost_equal
(
model
.
weights
,
new_model
.
weights
)
np
.
testing
.
assert_array_almost_equal
(
model
.
weights
,
new_model
.
weights
)
...
@@ -52,15 +56,44 @@ class TestLinearModel(unittest.TestCase):
...
@@ -52,15 +56,44 @@ class TestLinearModel(unittest.TestCase):
expected_y
=
expected_model
.
predict
(
self
.
predict_x
)
expected_y
=
expected_model
.
predict
(
self
.
predict_x
)
np
.
testing
.
assert_array_almost_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
expected_model
.
coef_
,
model
.
weights
)
def
test_linear_regression_persistence
(
self
):
def
test_linear_regression_persistence
(
self
):
model
=
LinearRegression
([
'a'
,
'b'
,
'c'
],
fit_intercept
=
False
)
model
=
LinearRegression
([
'a'
,
'b'
,
'c'
],
fit_intercept
=
False
)
model
.
fit
(
self
.
train_x
,
self
.
train_y
)
model
.
fit
(
self
.
train_x
,
self
.
train_y
)
desc
=
model
.
save
()
desc
=
model
.
save
()
new_model
=
LinearRegression
.
load
(
desc
)
new_model
=
load_model
(
desc
)
calculated_y
=
new_model
.
predict
(
self
.
predict_x
)
calculated_y
=
new_model
.
predict
(
self
.
predict_x
)
expected_y
=
model
.
predict
(
self
.
predict_x
)
expected_y
=
model
.
predict
(
self
.
predict_x
)
np
.
testing
.
assert_array_almost_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
new_model
.
weights
,
model
.
weights
)
def
test_logistic_regression
(
self
):
model
=
LogisticRegression
([
'a'
,
'b'
,
'c'
],
fit_intercept
=
False
)
model
.
fit
(
self
.
train_x
,
self
.
train_y_label
)
calculated_y
=
model
.
predict
(
self
.
predict_x
)
expected_model
=
LogisticRegression2
(
fit_intercept
=
False
)
expected_model
.
fit
(
self
.
train_x
,
self
.
train_y_label
)
expected_y
=
expected_model
.
predict
(
self
.
predict_x
)
np
.
testing
.
assert_array_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
expected_model
.
coef_
,
model
.
weights
)
def
test_logistic_regression_persistence
(
self
):
model
=
LinearRegression
([
'a'
,
'b'
,
'c'
],
fit_intercept
=
False
)
model
.
fit
(
self
.
train_x
,
self
.
train_y_label
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
calculated_y
=
new_model
.
predict
(
self
.
predict_x
)
expected_y
=
model
.
predict
(
self
.
predict_x
)
np
.
testing
.
assert_array_almost_equal
(
calculated_y
,
expected_y
)
np
.
testing
.
assert_array_almost_equal
(
new_model
.
weights
,
model
.
weights
)
alphamind/tests/model/test_treemodel.py
0 → 100644
View file @
4622cdf5
# -*- coding: utf-8 -*-
"""
Created on 2018-1-5
@author: cheng.li
"""
import
unittest
import
numpy
as
np
from
alphamind.model.loader
import
load_model
from
alphamind.model.treemodel
import
RandomForestRegressor
from
alphamind.model.treemodel
import
RandomForestClassifier
from
alphamind.model.treemodel
import
XGBRegressor
from
alphamind.model.treemodel
import
XGBClassifier
class
TestTreeModel
(
unittest
.
TestCase
):
def
test_random_forest_regress_persistence
(
self
):
model
=
RandomForestRegressor
(
features
=
list
(
range
(
10
)))
x
=
np
.
random
.
randn
(
1000
,
10
)
y
=
np
.
random
.
randn
(
1000
)
model
.
fit
(
x
,
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
def
test_random_forest_classify_persistence
(
self
):
model
=
RandomForestClassifier
(
features
=
list
(
range
(
10
)))
x
=
np
.
random
.
randn
(
1000
,
10
)
y
=
np
.
random
.
randn
(
1000
)
y
=
np
.
where
(
y
>
0
,
1
,
0
)
model
.
fit
(
x
,
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
def
test_xgb_regress_persistence
(
self
):
model
=
XGBRegressor
(
features
=
list
(
range
(
10
)))
x
=
np
.
random
.
randn
(
1000
,
10
)
y
=
np
.
random
.
randn
(
1000
)
model
.
fit
(
x
,
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
def
test_xgb_classify_persistence
(
self
):
model
=
XGBClassifier
(
features
=
list
(
range
(
10
)))
x
=
np
.
random
.
randn
(
1000
,
10
)
y
=
np
.
random
.
randn
(
1000
)
y
=
np
.
where
(
y
>
0
,
1
,
0
)
model
.
fit
(
x
,
y
)
desc
=
model
.
save
()
new_model
=
load_model
(
desc
)
self
.
assertEqual
(
model
.
features
,
new_model
.
features
)
sample_x
=
np
.
random
.
randn
(
100
,
10
)
np
.
testing
.
assert_array_almost_equal
(
model
.
predict
(
sample_x
),
new_model
.
predict
(
sample_x
))
alphamind/tests/test_suite.py
View file @
4622cdf5
...
@@ -28,6 +28,7 @@ from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
...
@@ -28,6 +28,7 @@ from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from
alphamind.tests.analysis.test_factoranalysis
import
TestFactorAnalysis
from
alphamind.tests.analysis.test_factoranalysis
import
TestFactorAnalysis
from
alphamind.tests.analysis.test_quantilieanalysis
import
TestQuantileAnalysis
from
alphamind.tests.analysis.test_quantilieanalysis
import
TestQuantileAnalysis
from
alphamind.tests.model.test_linearmodel
import
TestLinearModel
from
alphamind.tests.model.test_linearmodel
import
TestLinearModel
from
alphamind.tests.model.test_treemodel
import
TestTreeModel
from
alphamind.tests.model.test_loader
import
TestLoader
from
alphamind.tests.model.test_loader
import
TestLoader
from
alphamind.tests.execution.test_naiveexecutor
import
TestNaiveExecutor
from
alphamind.tests.execution.test_naiveexecutor
import
TestNaiveExecutor
from
alphamind.tests.execution.test_thresholdexecutor
import
TestThresholdExecutor
from
alphamind.tests.execution.test_thresholdexecutor
import
TestThresholdExecutor
...
@@ -54,6 +55,7 @@ if __name__ == '__main__':
...
@@ -54,6 +55,7 @@ if __name__ == '__main__':
TestFactorAnalysis
,
TestFactorAnalysis
,
TestQuantileAnalysis
,
TestQuantileAnalysis
,
TestLinearModel
,
TestLinearModel
,
TestTreeModel
,
TestLoader
,
TestLoader
,
TestNaiveExecutor
,
TestNaiveExecutor
,
TestThresholdExecutor
,
TestThresholdExecutor
,
...
...
alphamind/utilities.py
View file @
4622cdf5
...
@@ -16,6 +16,27 @@ import numba as nb
...
@@ -16,6 +16,27 @@ import numba as nb
alpha_logger
=
CustomLogger
(
'ALPHA_MIND'
,
'info'
)
alpha_logger
=
CustomLogger
(
'ALPHA_MIND'
,
'info'
)
def
map_freq
(
freq
):
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'2w'
:
horizon
=
9
elif
freq
==
'3w'
:
horizon
=
14
elif
freq
==
'4w'
:
horizon
=
19
elif
freq
==
'1d'
:
horizon
=
0
elif
freq
[
-
1
]
==
"b"
:
horizon
=
int
(
freq
[:
-
1
])
-
1
else
:
raise
ValueError
(
"Unrecognized freq: {0}"
.
format
(
freq
))
return
horizon
def
groupby
(
groups
):
def
groupby
(
groups
):
order
=
groups
.
argsort
()
order
=
groups
.
argsort
()
t
=
groups
[
order
]
t
=
groups
[
order
]
...
...
build_linux_dependencies.sh
View file @
4622cdf5
#!/bin/sh
#!/bin/sh
cd
alphamind/pfopt
cd
xgboost
git submodule init
git submodule update
./build_linux.sh
make
-j4
cd
python-package
python setup.py
install
if
[
$?
-ne
0
]
;
then
cd
../..
exit
1
fi
cd
../..
cd
alphamind/pfopt
./build_linux.sh
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
cd
../..
cd
../..
exit
1
exit
1
...
...
build_windows_dependencies.bat
View file @
4622cdf5
@echo off
@echo off
cd xgboost
git submodule init
git submodule update
mkdir build
cd build
cmake .. -G "Visual Studio 14 2015 Win64"
msbuild xgboost.sln /m /p:Configuration=Release /p:Platform=x64
if %errorlevel% neq 0 exit /b 1
cd ../python-package
python setup.py install
if %errorlevel% neq 0 exit /b 1
cd ../..
cd alphamind\pfopt
cd alphamind\pfopt
call build_windows.bat
call build_windows.bat
...
...
notebooks/candidate_prod_model_20171204.ipynb
0 → 100644
View file @
4622cdf5
This diff is collapsed.
Click to expand it.
notebooks/model_comparing.ipynb
0 → 100644
View file @
4622cdf5
This diff is collapsed.
Click to expand it.
notebooks/prod_model_20171117.ipynb
0 → 100644
View file @
4622cdf5
This diff is collapsed.
Click to expand it.
xgboost
@
bf436718
Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment