Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
6f2b65e5
Unverified
Commit
6f2b65e5
authored
Apr 13, 2018
by
iLampard
Committed by
GitHub
Apr 13, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #7 from alpha-miner/master
merge update
parents
3dd7d3f7
31805b28
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
133 additions
and
44 deletions
+133
-44
sqlengine.py
alphamind/data/engines/sqlengine.py
+53
-8
utilities.py
alphamind/data/engines/utilities.py
+7
-2
targetvolexecutor.py
alphamind/execution/targetvolexecutor.py
+2
-2
composer.py
alphamind/model/composer.py
+2
-1
data_preparing.py
alphamind/model/data_preparing.py
+35
-15
linearmodel.py
alphamind/model/linearmodel.py
+9
-8
modelbase.py
alphamind/model/modelbase.py
+15
-3
treemodel.py
alphamind/model/treemodel.py
+10
-5
No files found.
alphamind/data/engines/sqlengine.py
View file @
6f2b65e5
...
...
@@ -417,8 +417,7 @@ class SqlEngine(object):
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
if
universe
.
is_filtered
:
codes
=
universe
.
query
(
self
,
start_date
,
end_date
,
dates
)
df
=
pd
.
merge
(
df
,
codes
,
how
=
'inner'
,
on
=
[
'trade_date'
,
'code'
])
df
=
pd
.
merge
(
df
,
universe_df
,
how
=
'inner'
,
on
=
[
'trade_date'
,
'code'
])
if
external_data
is
not
None
:
df
=
pd
.
merge
(
df
,
external_data
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
...
...
@@ -435,6 +434,55 @@ class SqlEngine(object):
df
=
df
.
reset_index
()
return
pd
.
merge
(
df
,
universe_df
[[
'trade_date'
,
'code'
]],
how
=
'inner'
)
def
fetch_factor_range_forward
(
self
,
universe
:
Universe
,
factors
:
Union
[
Transformer
,
object
],
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
):
if
isinstance
(
factors
,
Transformer
):
transformer
=
factors
else
:
transformer
=
Transformer
(
factors
)
dependency
=
transformer
.
dependency
factor_cols
=
_map_factors
(
dependency
,
factor_tables
)
codes
=
universe
.
query
(
self
,
start_date
,
end_date
,
dates
)
total_codes
=
codes
.
code
.
unique
()
.
tolist
()
total_dates
=
codes
.
trade_date
.
astype
(
str
)
.
unique
()
.
tolist
()
big_table
=
Market
joined_tables
=
set
()
joined_tables
.
add
(
Market
.
__table__
.
name
)
for
t
in
set
(
factor_cols
.
values
()):
if
t
.
__table__
.
name
not
in
joined_tables
:
if
dates
is
not
None
:
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
trade_date
==
t
.
trade_date
,
Market
.
code
==
t
.
code
,
Market
.
trade_date
.
in_
(
dates
)))
else
:
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
trade_date
==
t
.
trade_date
,
Market
.
code
==
t
.
code
,
Market
.
trade_date
.
between
(
start_date
,
end_date
)))
joined_tables
.
add
(
t
.
__table__
.
name
)
stats
=
func
.
lag
(
list
(
factor_cols
.
keys
())[
0
],
-
1
)
.
over
(
partition_by
=
Market
.
code
,
order_by
=
Market
.
trade_date
)
.
label
(
'dx'
)
query
=
select
([
Market
.
trade_date
,
Market
.
code
,
stats
])
.
select_from
(
big_table
)
.
where
(
and_
(
Market
.
trade_date
.
in_
(
total_dates
),
Market
.
code
.
in_
(
total_codes
)
)
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'trade_date'
,
'code'
])
return
df
def
fetch_benchmark
(
self
,
ref_date
:
str
,
benchmark
:
int
,
...
...
@@ -988,9 +1036,6 @@ if __name__ == '__main__':
universe
=
Universe
(
''
,
[
'zz800'
])
codes
=
engine
.
fetch_codes
(
ref_date
,
universe
)
dates
=
makeSchedule
(
'2010-01-01'
,
'2018-02-01'
,
'10b'
,
'china.sse'
)
# df = engine.fetch_factor_range(universe, DIFF('roe_q'), dates=dates)
risk_cov
,
risk_exposure
=
engine
.
fetch_risk_model
(
ref_date
,
codes
)
factor_data
=
engine
.
fetch_factor_range
(
universe
,
[
'roe_q'
],
dates
=
dates
)
risk_cov
,
risk_exposure
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
)
dates
=
makeSchedule
(
'2018-01-01'
,
'2018-02-01'
,
'10b'
,
'china.sse'
)
factor_data
=
engine
.
fetch_factor_range_forward
(
universe
,
[
'roe_q'
],
dates
=
dates
)
print
(
factor_data
)
alphamind/data/engines/utilities.py
View file @
6f2b65e5
...
...
@@ -7,6 +7,7 @@ Created on 2017-12-25
from
typing
import
Iterable
from
typing
import
Dict
from
alphamind.data.dbmodel.models
import
Market
from
alphamind.data.dbmodel.models
import
RiskCovDay
from
alphamind.data.dbmodel.models
import
RiskCovShort
from
alphamind.data.dbmodel.models
import
RiskCovLong
...
...
@@ -22,7 +23,7 @@ from alphamind.data.dbmodel.models import RiskExposure
from
alphamind.data.engines.industries
import
INDUSTRY_MAPPING
factor_tables
=
[
RiskExposure
,
Uqer
,
Gogoal
,
Experimental
,
LegacyFactor
,
Tiny
]
factor_tables
=
[
Market
,
RiskExposure
,
Uqer
,
Gogoal
,
Experimental
,
LegacyFactor
,
Tiny
]
def
_map_risk_model_table
(
risk_model
:
str
)
->
tuple
:
...
...
@@ -44,13 +45,17 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
if
f
not
in
excluded
and
f
in
t
.
__table__
.
columns
:
factor_cols
[
t
.
__table__
.
columns
[
f
]]
=
t
break
if
not
factor_cols
:
raise
ValueError
(
f
"some factors in <{factors}> can't be find"
)
return
factor_cols
def
_map_industry_category
(
category
:
str
)
->
str
:
if
category
==
'sw'
:
return
'申万行业分类'
if
category
==
'sw_adj'
:
el
if
category
==
'sw_adj'
:
return
'申万行业分类修订'
elif
category
==
'zz'
:
return
'中证行业分类'
...
...
alphamind/execution/targetvolexecutor.py
View file @
6f2b65e5
...
...
@@ -16,8 +16,8 @@ class TargetVolExecutor(ExecutorBase):
def
__init__
(
self
,
window
=
30
,
target_vol
=
0.01
):
super
()
.
__init__
()
self
.
m_vol
=
MovingStandardDeviation
(
window
=
window
,
dependency
=
'return'
)
self
.
m_leverage
=
MovingAverage
(
window
=
window
,
dependency
=
'leverage'
)
self
.
m_vol
=
MovingStandardDeviation
(
window
,
'return'
)
self
.
m_leverage
=
MovingAverage
(
window
,
'leverage'
)
self
.
target_vol
=
target_vol
self
.
multiplier
=
1.
...
...
alphamind/model/composer.py
View file @
6f2b65e5
...
...
@@ -116,7 +116,8 @@ class DataMeta(object):
self
.
risk_model
,
self
.
pre_process
,
self
.
post_process
,
self
.
warm_start
)
self
.
warm_start
,
fit_target
=
alpha_model
.
fit_target
)
def
fetch_predict_data
(
self
,
ref_date
:
str
,
...
...
alphamind/model/data_preparing.py
View file @
6f2b65e5
...
...
@@ -60,7 +60,8 @@ def prepare_data(engine: SqlEngine,
frequency
:
str
,
universe
:
Universe
,
benchmark
:
int
,
warm_start
:
int
=
0
):
warm_start
:
int
=
0
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
):
if
warm_start
>
0
:
p
=
Period
(
frequency
)
p
=
Period
(
length
=-
warm_start
*
p
.
length
(),
units
=
p
.
units
())
...
...
@@ -86,14 +87,22 @@ def prepare_data(engine: SqlEngine,
factors
=
transformer
,
dates
=
dates
)
.
sort_values
([
'trade_date'
,
'code'
])
alpha_logger
.
info
(
"factor data loading finished"
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
alpha_logger
.
info
(
"return data loading finished"
)
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
alpha_logger
.
info
(
"fit target data loading finished"
)
industry_df
=
engine
.
fetch_industry_range
(
universe
,
dates
=
dates
)
alpha_logger
.
info
(
"industry data loading finished"
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
alpha_logger
.
info
(
"benchmark data loading finished"
)
df
=
pd
.
merge
(
factor_df
,
return
_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
df
=
pd
.
merge
(
factor_df
,
target
_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
industry_df
,
on
=
[
'trade_date'
,
'code'
])
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
...
...
@@ -261,7 +270,8 @@ def fetch_train_phase(engine,
risk_model
:
str
=
'short'
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
)
->
dict
:
warm_start
:
int
=
0
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
)
->
dict
:
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
else
:
...
...
@@ -281,7 +291,13 @@ def fetch_train_phase(engine,
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
df
=
pd
.
merge
(
factor_df
,
target_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
...
...
@@ -336,7 +352,7 @@ def fetch_predict_phase(engine,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
,
fillna
:
str
=
None
):
fillna
:
str
=
None
):
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
else
:
...
...
@@ -356,7 +372,8 @@ def fetch_predict_phase(engine,
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
if
fillna
:
factor_df
=
factor_df
.
groupby
(
'trade_date'
)
.
apply
(
lambda
x
:
x
.
fillna
(
x
.
median
()))
.
reset_index
(
drop
=
True
)
.
dropna
()
factor_df
=
factor_df
.
groupby
(
'trade_date'
)
.
apply
(
lambda
x
:
x
.
fillna
(
x
.
median
()))
.
reset_index
(
drop
=
True
)
.
dropna
()
else
:
factor_df
=
factor_df
.
dropna
()
...
...
@@ -416,13 +433,16 @@ def fetch_predict_phase(engine,
if
__name__
==
'__main__'
:
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
from
alphamind.api
import
risk_styles
,
industry_styles
,
standardize
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
neutralized_risk
=
[
'SIZE'
]
neutralized_risk
=
risk_styles
+
industry_styles
res
=
fetch_train_phase
(
engine
,
[
'ep_q'
],
'2012-01-05'
,
'5b'
,
universe
,
16
,
neutralized_risk
=
neutralized_risk
)
'2012-01-05'
,
'5b'
,
universe
,
16
,
neutralized_risk
=
neutralized_risk
,
post_process
=
[
standardize
],
fit_target
=
'closePrice'
)
print
(
res
)
alphamind/model/linearmodel.py
View file @
6f2b65e5
...
...
@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
def
__init__
(
self
,
features
=
None
,
weights
:
dict
=
None
):
super
()
.
__init__
(
features
)
weights
:
dict
=
None
,
fit_target
=
None
):
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
if
features
is
not
None
and
weights
is
not
None
:
pyFinAssert
(
len
(
features
)
==
len
(
weights
),
ValueError
,
...
...
@@ -57,8 +58,8 @@ class ConstLinearModel(ModelBase):
class
LinearRegression
(
ModelBase
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
LinearRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
...
...
@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
class
LassoRegression
(
ModelBase
):
def
__init__
(
self
,
alpha
=
0.01
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
def
__init__
(
self
,
alpha
=
0.01
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
Lasso
(
alpha
=
alpha
,
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
...
...
@@ -111,8 +112,8 @@ class LassoRegression(ModelBase):
class
LogisticRegression
(
ModelBase
):
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
**
kwargs
):
super
()
.
__init__
(
features
)
def
__init__
(
self
,
features
=
None
,
fit_intercept
:
bool
=
False
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
LogisticRegressionImpl
(
fit_intercept
=
fit_intercept
,
**
kwargs
)
def
save
(
self
)
->
dict
:
...
...
alphamind/model/modelbase.py
View file @
6f2b65e5
...
...
@@ -18,12 +18,17 @@ from alphamind.data.transformer import Transformer
class
ModelBase
(
metaclass
=
abc
.
ABCMeta
):
def
__init__
(
self
,
features
=
None
):
def
__init__
(
self
,
features
=
None
,
fit_target
=
None
):
if
features
is
not
None
:
self
.
formulas
=
Transformer
(
features
)
self
.
features
=
self
.
formulas
.
names
else
:
self
.
features
=
None
if
fit_target
is
not
None
:
self
.
fit_target
=
Transformer
(
fit_target
)
else
:
self
.
fit_target
=
None
self
.
impl
=
None
self
.
trained_time
=
None
...
...
@@ -31,7 +36,8 @@ class ModelBase(metaclass=abc.ABCMeta):
return
encode
(
self
.
impl
)
==
encode
(
rhs
.
impl
)
\
and
self
.
trained_time
==
rhs
.
trained_time
\
and
list_eq
(
self
.
features
,
rhs
.
features
)
\
and
encode
(
self
.
formulas
)
==
encode
(
rhs
.
formulas
)
and
encode
(
self
.
formulas
)
==
encode
(
rhs
.
formulas
)
\
and
encode
(
self
.
fit_target
)
==
encode
(
rhs
.
fit_target
)
def
fit
(
self
,
x
:
pd
.
DataFrame
,
y
:
np
.
ndarray
):
self
.
impl
.
fit
(
x
[
self
.
features
]
.
values
,
y
.
flatten
())
...
...
@@ -56,15 +62,21 @@ class ModelBase(metaclass=abc.ABCMeta):
trained_time
=
self
.
trained_time
,
desc
=
encode
(
self
.
impl
),
formulas
=
encode
(
self
.
formulas
),
fit_target
=
encode
(
self
.
fit_target
),
internal_model
=
self
.
impl
.
__class__
.
__module__
+
"."
+
self
.
impl
.
__class__
.
__name__
)
return
model_desc
@
abc
.
abstractclassmethod
@
classmethod
@
abc
.
abstractmethod
def
load
(
cls
,
model_desc
:
dict
):
obj_layout
=
cls
()
obj_layout
.
features
=
model_desc
[
'features'
]
obj_layout
.
formulas
=
decode
(
model_desc
[
'formulas'
])
obj_layout
.
trained_time
=
model_desc
[
'trained_time'
]
obj_layout
.
impl
=
decode
(
model_desc
[
'desc'
])
if
'fit_target'
in
model_desc
:
obj_layout
.
fit_target
=
decode
(
model_desc
[
'fit_target'
])
else
:
obj_layout
.
fit_target
=
None
return
obj_layout
alphamind/model/treemodel.py
View file @
6f2b65e5
...
...
@@ -27,8 +27,9 @@ class RandomForestRegressor(ModelBase):
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
features
=
None
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
RandomForestRegressorImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
**
kwargs
)
...
...
@@ -59,8 +60,9 @@ class RandomForestClassifier(ModelBase):
n_estimators
:
int
=
100
,
max_features
:
str
=
'auto'
,
features
=
None
,
fit_target
=
None
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
RandomForestClassifierImpl
(
n_estimators
=
n_estimators
,
max_features
=
max_features
,
**
kwargs
)
...
...
@@ -92,9 +94,10 @@ class XGBRegressor(ModelBase):
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
features
=
None
,
fit_target
=
None
,
n_jobs
:
int
=
1
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
XGBRegressorImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
...
...
@@ -128,9 +131,10 @@ class XGBClassifier(ModelBase):
learning_rate
:
float
=
0.1
,
max_depth
:
int
=
3
,
features
=
None
,
fit_target
=
None
,
n_jobs
:
int
=
1
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
impl
=
XGBClassifierImpl
(
n_estimators
=
n_estimators
,
learning_rate
=
learning_rate
,
max_depth
=
max_depth
,
...
...
@@ -171,10 +175,11 @@ class XGBTrainer(ModelBase):
subsample
=
1.
,
colsample_bytree
=
1.
,
features
=
None
,
fit_target
=
None
,
random_state
:
int
=
0
,
n_jobs
:
int
=
1
,
**
kwargs
):
super
()
.
__init__
(
features
)
super
()
.
__init__
(
features
=
features
,
fit_target
=
fit_target
)
self
.
params
=
{
'silent'
:
1
,
'objective'
:
objective
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment