Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
38fd0ff5
Commit
38fd0ff5
authored
Sep 05, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added single phase data fetching
parent
8a405fdd
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
204 additions
and
36 deletions
+204
-36
model_training.py
alphamind/examples/model_training.py
+2
-2
data_preparing.py
alphamind/model/data_preparing.py
+202
-34
No files found.
alphamind/examples/model_training.py
View file @
38fd0ff5
...
@@ -29,7 +29,7 @@ re-balance - 1 week
...
@@ -29,7 +29,7 @@ re-balance - 1 week
training - every 4 week
training - every 4 week
'''
'''
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:
we083826@192.168.0.101
/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:
A12345678!@10.63.6.220
/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
portfolio_risk_neutralize
=
[
'SIZE'
]
portfolio_risk_neutralize
=
[
'SIZE'
]
...
@@ -59,7 +59,7 @@ data_package = fetch_data_package(engine,
...
@@ -59,7 +59,7 @@ data_package = fetch_data_package(engine,
neutralized_risk
=
neutralize_risk
,
neutralized_risk
=
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
standardize
],
post_process
=
[
standardize
],
warm_start
=
20
)
warm_start
=
8
)
'''
'''
training phase: using Linear - regression from scikit-learn
training phase: using Linear - regression from scikit-learn
...
...
alphamind/model/data_preparing.py
View file @
38fd0ff5
...
@@ -5,12 +5,14 @@ Created on 2017-8-24
...
@@ -5,12 +5,14 @@ Created on 2017-8-24
@author: cheng.li
@author: cheng.li
"""
"""
import
datetime
as
dt
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
typing
import
Iterable
from
typing
import
Iterable
from
typing
import
Union
from
typing
import
Union
from
PyFin.api
import
makeSchedule
from
PyFin.api
import
makeSchedule
from
PyFin.api
import
BizDayConventions
from
PyFin.api
import
BizDayConventions
from
PyFin.api
import
DateGeneration
from
PyFin.api
import
advanceDateByCalendar
from
PyFin.api
import
advanceDateByCalendar
from
PyFin.DateUtilities
import
Period
from
PyFin.DateUtilities
import
Period
from
PyFin.Enums
import
TimeUnits
from
PyFin.Enums
import
TimeUnits
...
@@ -35,6 +37,31 @@ def _map_horizon(frequency: str) -> int:
...
@@ -35,6 +37,31 @@ def _map_horizon(frequency: str) -> int:
raise
ValueError
(
'{0} is an unrecognized frequency rule'
.
format
(
frequency
))
raise
ValueError
(
'{0} is an unrecognized frequency rule'
.
format
(
frequency
))
def
_merge_df
(
engine
,
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
):
if
neutralized_risk
:
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])[[
'trade_date'
,
'code'
,
'dx'
]]
train_y
=
return_df
.
copy
()
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
else
:
risk_exp
=
None
train_x
=
factor_df
.
copy
()
train_y
=
return_df
.
copy
()
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
return
return_df
,
dates
,
date_label
,
risk_exp
,
x_values
,
y_values
,
train_x
,
train_y
def
prepare_data
(
engine
:
SqlEngine
,
def
prepare_data
(
engine
:
SqlEngine
,
factors
:
Union
[
Transformer
,
Iterable
[
object
]],
factors
:
Union
[
Transformer
,
Iterable
[
object
]],
start_date
:
str
,
start_date
:
str
,
...
@@ -44,9 +71,16 @@ def prepare_data(engine: SqlEngine,
...
@@ -44,9 +71,16 @@ def prepare_data(engine: SqlEngine,
benchmark
:
int
,
benchmark
:
int
,
warm_start
:
int
=
0
):
warm_start
:
int
=
0
):
if
warm_start
>
0
:
if
warm_start
>
0
:
start_date
=
advanceDateByCalendar
(
'china.sse'
,
start_date
,
str
(
-
warm_start
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
p
=
Period
(
frequency
)
p
=
Period
(
length
=-
warm_start
*
p
.
length
(),
units
=
p
.
units
())
start_date
=
advanceDateByCalendar
(
'china.sse'
,
start_date
,
p
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
)
dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
_map_horizon
(
frequency
)
horizon
=
_map_horizon
(
frequency
)
...
@@ -155,27 +189,8 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -155,27 +189,8 @@ def fetch_data_package(engine: SqlEngine,
benchmark
,
benchmark
,
warm_start
)
warm_start
)
if
neutralized_risk
:
return_df
,
dates
,
date_label
,
risk_exp
,
x_values
,
y_values
,
train_x
,
train_y
=
\
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
_merge_df
(
engine
,
transformer
.
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
)
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
transformer
.
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])[[
'trade_date'
,
'code'
,
'dx'
]]
train_y
=
return_df
.
copy
()
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
else
:
risk_exp
=
None
train_x
=
factor_df
.
copy
()
train_y
=
return_df
.
copy
()
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
return_df
[
'weight'
]
=
train_x
[
'weight'
]
return_df
[
'weight'
]
=
train_x
[
'weight'
]
return_df
[
'industry'
]
=
train_x
[
'industry'
]
return_df
[
'industry'
]
=
train_x
[
'industry'
]
...
@@ -205,18 +220,171 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -205,18 +220,171 @@ def fetch_data_package(engine: SqlEngine,
return
ret
return
ret
if
__name__
==
'__main__'
:
def
fetch_train_phase
(
engine
,
from
PyFin.api
import
MA
alpha_factors
:
Iterable
[
object
],
ref_date
,
frequency
,
universe
,
batch
,
neutralized_risk
:
Iterable
[
str
]
=
None
,
risk_model
:
str
=
'short'
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
):
transformer
=
Transformer
(
alpha_factors
)
p
=
Period
(
frequency
)
p
=
Period
(
length
=-
(
warm_start
+
batch
+
1
)
*
p
.
length
(),
units
=
p
.
units
())
start_date
=
advanceDateByCalendar
(
'china.sse'
,
ref_date
,
p
,
BizDayConventions
.
Following
)
dates
=
makeSchedule
(
start_date
,
ref_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
_map_horizon
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
return_df
,
factor_df
=
df
[[
'trade_date'
,
'code'
,
'dx'
]],
df
[
[
'trade_date'
,
'code'
,
'isOpen'
]
+
transformer
.
names
]
return_df
,
dates
,
date_label
,
risk_exp
,
x_values
,
y_values
,
_
,
_
=
\
_merge_df
(
engine
,
transformer
.
names
,
factor_df
,
return_df
,
universe
,
dates
,
risk_model
,
neutralized_risk
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
end
=
dates
[
-
2
]
start
=
dates
[
-
batch
-
1
]
else
:
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
this_raw_y
=
y_values
[
index
]
if
risk_exp
is
not
None
:
this_risk_exp
=
risk_exp
[
index
]
else
:
this_risk_exp
=
None
ne_x
=
factor_processing
(
this_raw_x
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
ne_y
=
factor_processing
(
this_raw_y
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'train'
]
=
{
'x'
:
ne_x
,
'y'
:
ne_y
}
return
ret
def
fetch_predict_phase
(
engine
,
alpha_factors
:
Iterable
[
object
],
ref_date
,
frequency
,
universe
,
batch
,
neutralized_risk
:
Iterable
[
str
]
=
None
,
risk_model
:
str
=
'short'
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
):
transformer
=
Transformer
(
alpha_factors
)
p
=
Period
(
frequency
)
p
=
Period
(
length
=-
(
warm_start
+
batch
)
*
p
.
length
(),
units
=
p
.
units
())
start_date
=
advanceDateByCalendar
(
'china.sse'
,
ref_date
,
p
,
BizDayConventions
.
Following
)
dates
=
makeSchedule
(
start_date
,
ref_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
.
dropna
()
names
=
transformer
.
names
if
neutralized_risk
:
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
else
:
train_x
=
factor_df
.
copy
()
risk_exp
=
None
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
sub_dates
=
date_label
[
index
]
if
risk_exp
is
not
None
:
this_risk_exp
=
risk_exp
[
index
]
else
:
this_risk_exp
=
None
ne_x
=
factor_processing
(
this_raw_x
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
ne_x
=
ne_x
[
sub_dates
==
end
]
codes
=
train_x
.
code
.
values
[
date_label
==
end
]
else
:
ne_x
=
None
codes
=
None
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'predict'
]
=
{
'x'
:
ne_x
,
'code'
:
codes
}
return
ret
if
__name__
==
'__main__'
:
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'ashare_ex'
])
res
=
fetch_data_package
(
engine
,
neutralized_risk
=
[
'SIZE'
]
MA
(
10
,
'EPS'
),
res
=
fetch_train_phase
(
engine
,
'2012-01-01'
,
[
'EPS'
,
'CFinc1'
],
'2012-04-01'
,
'2017-09-04'
,
'1m'
,
'2w'
,
universe
,
4
,
warm_start
=
1
,
neutralized_risk
=
neutralized_risk
)
print
(
res
)
res
=
fetch_predict_phase
(
engine
,
[
'EPS'
,
'CFinc1'
],
'2017-09-04'
,
'2w'
,
universe
,
universe
,
905
,
4
,
0
)
warm_start
=
1
,
neutralized_risk
=
neutralized_risk
)
print
(
res
)
print
(
res
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment