Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
ed1f44d9
Commit
ed1f44d9
authored
Apr 13, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed bug and add y in predict data
parent
31805b28
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
65 additions
and
35 deletions
+65
-35
composer.py
alphamind/model/composer.py
+37
-28
data_preparing.py
alphamind/model/data_preparing.py
+28
-7
No files found.
alphamind/model/composer.py
View file @
ed1f44d9
...
@@ -8,6 +8,7 @@ Created on 2017-9-27
...
@@ -8,6 +8,7 @@ Created on 2017-9-27
import
copy
import
copy
import
bisect
import
bisect
from
typing
import
Iterable
from
typing
import
Iterable
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
simpleutils.miscellaneous
import
list_eq
from
simpleutils.miscellaneous
import
list_eq
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.modelbase
import
ModelBase
...
@@ -133,7 +134,8 @@ class DataMeta(object):
...
@@ -133,7 +134,8 @@ class DataMeta(object):
self
.
pre_process
,
self
.
pre_process
,
self
.
post_process
,
self
.
post_process
,
self
.
warm_start
,
self
.
warm_start
,
fillna
=
True
)
fillna
=
True
,
fit_target
=
alpha_model
.
fit_target
)
def
train_model
(
ref_date
:
str
,
def
train_model
(
ref_date
:
str
,
...
@@ -186,6 +188,15 @@ class Composer(object):
...
@@ -186,6 +188,15 @@ class Composer(object):
codes
=
x
.
index
codes
=
x
.
index
return
pd
.
DataFrame
(
model
.
predict
(
x_values
)
.
flatten
(),
index
=
codes
)
return
pd
.
DataFrame
(
model
.
predict
(
x_values
)
.
flatten
(),
index
=
codes
)
def
score
(
self
,
ref_date
:
str
,
x
:
pd
.
DataFrame
=
None
,
y
:
np
.
ndarray
=
None
)
->
float
:
model
=
self
.
_fetch_latest_model
(
ref_date
)
if
x
is
None
:
predict_data
=
self
.
data_meta
.
fetch_predict_data
(
ref_date
,
model
)
x
=
predict_data
[
'predict'
][
'x'
]
if
y
is
None
:
y
=
predict_data
[
'predict'
][
'y'
]
return
model
.
score
(
x
,
y
)
def
_fetch_latest_model
(
self
,
ref_date
)
->
ModelBase
:
def
_fetch_latest_model
(
self
,
ref_date
)
->
ModelBase
:
if
self
.
is_updated
:
if
self
.
is_updated
:
sorted_keys
=
self
.
sorted_keys
sorted_keys
=
self
.
sorted_keys
...
@@ -211,35 +222,33 @@ class Composer(object):
...
@@ -211,35 +222,33 @@ class Composer(object):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
numpy
as
np
from
PyFin.api
import
LAST
from
alphamind.data.standardize
import
standardize
from
alphamind.data.engines.sqlengine
import
risk_styles
,
industry_styles
from
alphamind.data.winsorize
import
winsorize_normal
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.data.engines.sqlengine
import
industry_styles
from
alphamind.model.linearmodel
import
ConstLinearModel
universe
=
Universe
(
'custom'
,
[
'ashare_ex'
])
freq
=
'20b'
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
batch
=
0
alpha_model
=
ConstLinearModel
([
'EPS'
],
np
.
array
([
1.
]))
neutralized_risk
=
risk_styles
+
industry_styles
alpha_factors
=
[
'EPS'
]
freq
=
'1w'
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
batch
=
4
neutralized_risk
=
[
'SIZE'
]
+
industry_styles
risk_model
=
'short'
risk_model
=
'short'
pre_process
=
[
winsorize_normal
,
standardize
]
pre_process
=
[
winsorize_normal
,
standardize
]
pos_process
=
[
winsorize_normal
,
standardize
]
post_process
=
[
standardize
]
warm_start
=
0
data_meta
=
DataMeta
(
freq
,
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
universe
,
batch
,
data_meta
=
DataMeta
(
freq
=
freq
,
neutralized_risk
,
universe
=
universe
,
risk_model
,
batch
=
batch
,
pre_process
,
neutralized_risk
=
neutralized_risk
,
pos_process
,
risk_model
=
risk_model
,
pre_process
=
pre_process
,
post_process
=
post_process
,
warm_start
=
warm_start
,
data_source
=
data_source
)
data_source
=
data_source
)
composer
=
Composer
(
alpha_model
,
data_meta
)
alpha_model
=
LinearRegression
({
'roe_q'
:
LAST
(
'roe_q'
)},
fit_target
=
'roe_q'
)
composer
=
Composer
(
alpha_model
=
alpha_model
,
data_meta
=
data_meta
)
composer
.
train
(
'2017-09-20'
)
ref_date
=
'2018-01-30'
composer
.
train
(
'2017-09-22'
)
composer
.
train
(
ref_date
)
composer
.
train
(
'2017-09-25'
)
res
=
composer
.
predict
(
ref_date
)
composer
.
predict
(
'2017-09-21'
)
\ No newline at end of file
alphamind/model/data_preparing.py
View file @
ed1f44d9
...
@@ -310,10 +310,10 @@ def fetch_train_phase(engine,
...
@@ -310,10 +310,10 @@ def fetch_train_phase(engine,
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
pyFinAssert
(
len
(
dates
)
>=
2
,
ValueError
,
"No previous data for training for the date {0}"
.
format
(
ref_date
))
pyFinAssert
(
len
(
dates
)
>=
2
,
ValueError
,
"No previous data for training for the date {0}"
.
format
(
ref_date
))
end
=
dates
[
-
2
]
end
=
dates
[
-
2
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
-
1
else
dates
[
0
]
start
=
dates
[
-
batch
-
2
]
if
batch
<=
len
(
dates
)
-
2
else
dates
[
0
]
else
:
else
:
end
=
dates
[
-
1
]
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
this_raw_x
=
x_values
[
index
]
...
@@ -352,7 +352,8 @@ def fetch_predict_phase(engine,
...
@@ -352,7 +352,8 @@ def fetch_predict_phase(engine,
pre_process
:
Iterable
[
object
]
=
None
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
,
warm_start
:
int
=
0
,
fillna
:
str
=
None
):
fillna
:
str
=
None
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
):
if
isinstance
(
alpha_factors
,
Transformer
):
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
transformer
=
alpha_factors
else
:
else
:
...
@@ -369,6 +370,8 @@ def fetch_predict_phase(engine,
...
@@ -369,6 +370,8 @@ def fetch_predict_phase(engine,
dateRule
=
BizDayConventions
.
Following
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
if
fillna
:
if
fillna
:
...
@@ -377,6 +380,14 @@ def fetch_predict_phase(engine,
...
@@ -377,6 +380,14 @@ def fetch_predict_phase(engine,
else
:
else
:
factor_df
=
factor_df
.
dropna
()
factor_df
=
factor_df
.
dropna
()
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
names
=
transformer
.
names
names
=
transformer
.
names
if
neutralized_risk
:
if
neutralized_risk
:
...
@@ -384,13 +395,15 @@ def fetch_predict_phase(engine,
...
@@ -384,13 +395,15 @@ def fetch_predict_phase(engine,
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
train_x
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
else
:
else
:
train_x
=
factor_df
.
copy
(
)
train_x
=
pd
.
merge
(
factor_df
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
None
risk_exp
=
None
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
y_values
=
train_x
[
'dx'
]
.
values
.
astype
(
float
)
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
train_x
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
dates
=
np
.
unique
(
date_label
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
...
@@ -400,6 +413,7 @@ def fetch_predict_phase(engine,
...
@@ -400,6 +413,7 @@ def fetch_predict_phase(engine,
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
this_raw_x
=
x_values
[
left_index
:
right_index
]
this_raw_x
=
x_values
[
left_index
:
right_index
]
this_raw_y
=
y_values
[
left_index
:
right_index
]
sub_dates
=
date_label
[
left_index
:
right_index
]
sub_dates
=
date_label
[
left_index
:
right_index
]
if
risk_exp
is
not
None
:
if
risk_exp
is
not
None
:
...
@@ -412,10 +426,16 @@ def fetch_predict_phase(engine,
...
@@ -412,10 +426,16 @@ def fetch_predict_phase(engine,
risk_factors
=
this_risk_exp
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
post_process
=
post_process
)
ne_y
=
factor_processing
(
this_raw_y
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
ne_x
=
ne_x
[
inner_left_index
:
inner_right_index
]
ne_x
=
ne_x
[
inner_left_index
:
inner_right_index
]
ne_y
=
ne_y
[
inner_left_index
:
inner_right_index
]
left_index
=
bisect
.
bisect_left
(
date_label
,
end
)
left_index
=
bisect
.
bisect_left
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
...
@@ -423,11 +443,12 @@ def fetch_predict_phase(engine,
...
@@ -423,11 +443,12 @@ def fetch_predict_phase(engine,
codes
=
train_x
.
code
.
values
[
left_index
:
right_index
]
codes
=
train_x
.
code
.
values
[
left_index
:
right_index
]
else
:
else
:
ne_x
=
None
ne_x
=
None
ne_y
=
None
codes
=
None
codes
=
None
ret
=
dict
()
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
}
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
,
'y'
:
ne_y
.
flatten
()
}
return
ret
return
ret
...
@@ -437,7 +458,7 @@ if __name__ == '__main__':
...
@@ -437,7 +458,7 @@ if __name__ == '__main__':
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
neutralized_risk
=
risk_styles
+
industry_styles
neutralized_risk
=
risk_styles
+
industry_styles
res
=
fetch_
train
_phase
(
engine
,
[
'ep_q'
],
res
=
fetch_
predict
_phase
(
engine
,
[
'ep_q'
],
'2012-01-05'
,
'2012-01-05'
,
'5b'
,
'5b'
,
universe
,
universe
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment