Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
ed1f44d9
Commit
ed1f44d9
authored
Apr 13, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed bug and add y in predict data
parent
31805b28
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
65 additions
and
35 deletions
+65
-35
composer.py
alphamind/model/composer.py
+37
-28
data_preparing.py
alphamind/model/data_preparing.py
+28
-7
No files found.
alphamind/model/composer.py
View file @
ed1f44d9
...
...
@@ -8,6 +8,7 @@ Created on 2017-9-27
import
copy
import
bisect
from
typing
import
Iterable
import
numpy
as
np
import
pandas
as
pd
from
simpleutils.miscellaneous
import
list_eq
from
alphamind.model.modelbase
import
ModelBase
...
...
@@ -133,7 +134,8 @@ class DataMeta(object):
self
.
pre_process
,
self
.
post_process
,
self
.
warm_start
,
fillna
=
True
)
fillna
=
True
,
fit_target
=
alpha_model
.
fit_target
)
def
train_model
(
ref_date
:
str
,
...
...
@@ -186,6 +188,15 @@ class Composer(object):
codes
=
x
.
index
return
pd
.
DataFrame
(
model
.
predict
(
x_values
)
.
flatten
(),
index
=
codes
)
def
score
(
self
,
ref_date
:
str
,
x
:
pd
.
DataFrame
=
None
,
y
:
np
.
ndarray
=
None
)
->
float
:
model
=
self
.
_fetch_latest_model
(
ref_date
)
if
x
is
None
:
predict_data
=
self
.
data_meta
.
fetch_predict_data
(
ref_date
,
model
)
x
=
predict_data
[
'predict'
][
'x'
]
if
y
is
None
:
y
=
predict_data
[
'predict'
][
'y'
]
return
model
.
score
(
x
,
y
)
def
_fetch_latest_model
(
self
,
ref_date
)
->
ModelBase
:
if
self
.
is_updated
:
sorted_keys
=
self
.
sorted_keys
...
...
@@ -211,35 +222,33 @@ class Composer(object):
if
__name__
==
'__main__'
:
import
numpy
as
np
from
alphamind.data.standardize
import
standardize
from
alphamind.data.winsorize
import
winsorize_normal
from
alphamind.data.engines.sqlengine
import
industry_styles
from
alphamind.model.linearmodel
import
ConstLinearModel
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
alpha_model
=
ConstLinearModel
([
'EPS'
],
np
.
array
([
1.
]))
alpha_factors
=
[
'EPS'
]
freq
=
'1w'
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
batch
=
4
neutralized_risk
=
[
'SIZE'
]
+
industry_styles
from
PyFin.api
import
LAST
from
alphamind.data.engines.sqlengine
import
risk_styles
,
industry_styles
from
alphamind.model.linearmodel
import
LinearRegression
universe
=
Universe
(
'custom'
,
[
'ashare_ex'
])
freq
=
'20b'
batch
=
0
neutralized_risk
=
risk_styles
+
industry_styles
risk_model
=
'short'
pre_process
=
[
winsorize_normal
,
standardize
]
pos_process
=
[
winsorize_normal
,
standardize
]
data_meta
=
DataMeta
(
freq
,
universe
,
batch
,
neutralized_risk
,
risk_model
,
pre_process
,
pos_process
,
post_process
=
[
standardize
]
warm_start
=
0
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
data_meta
=
DataMeta
(
freq
=
freq
,
universe
=
universe
,
batch
=
batch
,
neutralized_risk
=
neutralized_risk
,
risk_model
=
risk_model
,
pre_process
=
pre_process
,
post_process
=
post_process
,
warm_start
=
warm_start
,
data_source
=
data_source
)
composer
=
Composer
(
alpha_model
,
data_meta
)
alpha_model
=
LinearRegression
({
'roe_q'
:
LAST
(
'roe_q'
)},
fit_target
=
'roe_q'
)
composer
=
Composer
(
alpha_model
=
alpha_model
,
data_meta
=
data_meta
)
composer
.
train
(
'2017-09-20'
)
composer
.
train
(
'2017-09-22'
)
composer
.
train
(
'2017-09-25'
)
composer
.
predict
(
'2017-09-21'
)
ref_date
=
'2018-01-30'
composer
.
train
(
ref_date
)
res
=
composer
.
predict
(
ref_date
)
\ No newline at end of file
alphamind/model/data_preparing.py
View file @
ed1f44d9
...
...
@@ -310,10 +310,10 @@ def fetch_train_phase(engine,
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
pyFinAssert
(
len
(
dates
)
>=
2
,
ValueError
,
"No previous data for training for the date {0}"
.
format
(
ref_date
))
end
=
dates
[
-
2
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
-
1
else
dates
[
0
]
start
=
dates
[
-
batch
-
2
]
if
batch
<=
len
(
dates
)
-
2
else
dates
[
0
]
else
:
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
...
...
@@ -352,7 +352,8 @@ def fetch_predict_phase(engine,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
,
fillna
:
str
=
None
):
fillna
:
str
=
None
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
):
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
else
:
...
...
@@ -369,6 +370,8 @@ def fetch_predict_phase(engine,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
if
fillna
:
...
...
@@ -377,6 +380,14 @@ def fetch_predict_phase(engine,
else
:
factor_df
=
factor_df
.
dropna
()
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
names
=
transformer
.
names
if
neutralized_risk
:
...
...
@@ -384,13 +395,15 @@ def fetch_predict_phase(engine,
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
train_x
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
else
:
train_x
=
factor_df
.
copy
(
)
train_x
=
pd
.
merge
(
factor_df
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
None
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
y_values
=
train_x
[
'dx'
]
.
values
.
astype
(
float
)
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
train_x
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
...
...
@@ -400,6 +413,7 @@ def fetch_predict_phase(engine,
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
this_raw_x
=
x_values
[
left_index
:
right_index
]
this_raw_y
=
y_values
[
left_index
:
right_index
]
sub_dates
=
date_label
[
left_index
:
right_index
]
if
risk_exp
is
not
None
:
...
...
@@ -412,10 +426,16 @@ def fetch_predict_phase(engine,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
ne_y
=
factor_processing
(
this_raw_y
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
ne_x
=
ne_x
[
inner_left_index
:
inner_right_index
]
ne_y
=
ne_y
[
inner_left_index
:
inner_right_index
]
left_index
=
bisect
.
bisect_left
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
...
...
@@ -423,11 +443,12 @@ def fetch_predict_phase(engine,
codes
=
train_x
.
code
.
values
[
left_index
:
right_index
]
else
:
ne_x
=
None
ne_y
=
None
codes
=
None
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
}
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
,
'y'
:
ne_y
.
flatten
()
}
return
ret
...
...
@@ -437,7 +458,7 @@ if __name__ == '__main__':
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
neutralized_risk
=
risk_styles
+
industry_styles
res
=
fetch_
train
_phase
(
engine
,
[
'ep_q'
],
res
=
fetch_
predict
_phase
(
engine
,
[
'ep_q'
],
'2012-01-05'
,
'5b'
,
universe
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment