Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
a1cc7865
Unverified
Commit
a1cc7865
authored
Apr 16, 2018
by
iLampard
Committed by
GitHub
Apr 16, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #8 from alpha-miner/master
merge update
parents
45afaf12
81538ae6
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
435 additions
and
37 deletions
+435
-37
sqlengine.py
alphamind/data/engines/sqlengine.py
+1
-1
utilities.py
alphamind/data/engines/utilities.py
+1
-1
composer.py
alphamind/model/composer.py
+37
-28
data_preparing.py
alphamind/model/data_preparing.py
+31
-7
Example 11 - Long Short Strategy Model.ipynb
notebooks/Example 11 - Long Short Strategy Model.ipynb
+365
-0
No files found.
alphamind/data/engines/sqlengine.py
View file @
a1cc7865
...
@@ -481,7 +481,7 @@ class SqlEngine(object):
...
@@ -481,7 +481,7 @@ class SqlEngine(object):
)
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'trade_date'
,
'code'
])
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'trade_date'
,
'code'
])
return
df
return
pd
.
merge
(
df
,
codes
[[
'trade_date'
,
'code'
]],
how
=
'inner'
)
def
fetch_benchmark
(
self
,
def
fetch_benchmark
(
self
,
ref_date
:
str
,
ref_date
:
str
,
...
...
alphamind/data/engines/utilities.py
View file @
a1cc7865
...
@@ -47,7 +47,7 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
...
@@ -47,7 +47,7 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
break
break
if
not
factor_cols
:
if
not
factor_cols
:
raise
ValueError
(
f
"some factors in <{factors}> can't be find"
)
raise
ValueError
(
"some factors in <{0}> can't be find"
.
format
(
factors
)
)
return
factor_cols
return
factor_cols
...
...
alphamind/model/composer.py
View file @
a1cc7865
...
@@ -8,6 +8,7 @@ Created on 2017-9-27
...
@@ -8,6 +8,7 @@ Created on 2017-9-27
import
copy
import
copy
import
bisect
import
bisect
from
typing
import
Iterable
from
typing
import
Iterable
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
simpleutils.miscellaneous
import
list_eq
from
simpleutils.miscellaneous
import
list_eq
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.modelbase
import
ModelBase
...
@@ -133,7 +134,8 @@ class DataMeta(object):
...
@@ -133,7 +134,8 @@ class DataMeta(object):
self
.
pre_process
,
self
.
pre_process
,
self
.
post_process
,
self
.
post_process
,
self
.
warm_start
,
self
.
warm_start
,
fillna
=
True
)
fillna
=
True
,
fit_target
=
alpha_model
.
fit_target
)
def
train_model
(
ref_date
:
str
,
def
train_model
(
ref_date
:
str
,
...
@@ -186,6 +188,15 @@ class Composer(object):
...
@@ -186,6 +188,15 @@ class Composer(object):
codes
=
x
.
index
codes
=
x
.
index
return
pd
.
DataFrame
(
model
.
predict
(
x_values
)
.
flatten
(),
index
=
codes
)
return
pd
.
DataFrame
(
model
.
predict
(
x_values
)
.
flatten
(),
index
=
codes
)
def
score
(
self
,
ref_date
:
str
,
x
:
pd
.
DataFrame
=
None
,
y
:
np
.
ndarray
=
None
)
->
float
:
model
=
self
.
_fetch_latest_model
(
ref_date
)
if
x
is
None
:
predict_data
=
self
.
data_meta
.
fetch_predict_data
(
ref_date
,
model
)
x
=
predict_data
[
'predict'
][
'x'
]
if
y
is
None
:
y
=
predict_data
[
'predict'
][
'y'
]
return
model
.
score
(
x
,
y
)
def
_fetch_latest_model
(
self
,
ref_date
)
->
ModelBase
:
def
_fetch_latest_model
(
self
,
ref_date
)
->
ModelBase
:
if
self
.
is_updated
:
if
self
.
is_updated
:
sorted_keys
=
self
.
sorted_keys
sorted_keys
=
self
.
sorted_keys
...
@@ -211,35 +222,33 @@ class Composer(object):
...
@@ -211,35 +222,33 @@ class Composer(object):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
numpy
as
np
from
PyFin.api
import
LAST
from
alphamind.data.standardize
import
standardize
from
alphamind.data.engines.sqlengine
import
risk_styles
,
industry_styles
from
alphamind.data.winsorize
import
winsorize_normal
from
alphamind.model.linearmodel
import
LinearRegression
from
alphamind.data.engines.sqlengine
import
industry_styles
from
alphamind.model.linearmodel
import
ConstLinearModel
universe
=
Universe
(
'custom'
,
[
'ashare_ex'
])
freq
=
'20b'
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
batch
=
0
alpha_model
=
ConstLinearModel
([
'EPS'
],
np
.
array
([
1.
]))
neutralized_risk
=
risk_styles
+
industry_styles
alpha_factors
=
[
'EPS'
]
freq
=
'1w'
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
batch
=
4
neutralized_risk
=
[
'SIZE'
]
+
industry_styles
risk_model
=
'short'
risk_model
=
'short'
pre_process
=
[
winsorize_normal
,
standardize
]
pre_process
=
[
winsorize_normal
,
standardize
]
pos_process
=
[
winsorize_normal
,
standardize
]
post_process
=
[
standardize
]
warm_start
=
0
data_meta
=
DataMeta
(
freq
,
data_source
=
"postgres+psycopg2://postgres:we083826@localhost/alpha"
universe
,
batch
,
data_meta
=
DataMeta
(
freq
=
freq
,
neutralized_risk
,
universe
=
universe
,
risk_model
,
batch
=
batch
,
pre_process
,
neutralized_risk
=
neutralized_risk
,
pos_process
,
risk_model
=
risk_model
,
pre_process
=
pre_process
,
post_process
=
post_process
,
warm_start
=
warm_start
,
data_source
=
data_source
)
data_source
=
data_source
)
composer
=
Composer
(
alpha_model
,
data_meta
)
alpha_model
=
LinearRegression
({
'roe_q'
:
LAST
(
'roe_q'
)},
fit_target
=
'roe_q'
)
composer
=
Composer
(
alpha_model
=
alpha_model
,
data_meta
=
data_meta
)
composer
.
train
(
'2017-09-20'
)
ref_date
=
'2018-01-30'
composer
.
train
(
'2017-09-22'
)
composer
.
train
(
ref_date
)
composer
.
train
(
'2017-09-25'
)
res
=
composer
.
predict
(
ref_date
)
composer
.
predict
(
'2017-09-21'
)
\ No newline at end of file
alphamind/model/data_preparing.py
View file @
a1cc7865
...
@@ -106,6 +106,7 @@ def prepare_data(engine: SqlEngine,
...
@@ -106,6 +106,7 @@ def prepare_data(engine: SqlEngine,
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
industry_df
,
on
=
[
'trade_date'
,
'code'
])
df
=
pd
.
merge
(
df
,
industry_df
,
on
=
[
'trade_date'
,
'code'
])
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
df
.
dropna
(
inplace
=
True
)
return
dates
,
df
[[
'trade_date'
,
'code'
,
'dx'
]],
df
[
return
dates
,
df
[[
'trade_date'
,
'code'
,
'dx'
]],
df
[
[
'trade_date'
,
'code'
,
'weight'
,
'isOpen'
,
'industry_code'
,
'industry'
]
+
transformer
.
names
]
[
'trade_date'
,
'code'
,
'weight'
,
'isOpen'
,
'industry_code'
,
'industry'
]
+
transformer
.
names
]
...
@@ -310,10 +311,10 @@ def fetch_train_phase(engine,
...
@@ -310,10 +311,10 @@ def fetch_train_phase(engine,
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
pyFinAssert
(
len
(
dates
)
>=
2
,
ValueError
,
"No previous data for training for the date {0}"
.
format
(
ref_date
))
pyFinAssert
(
len
(
dates
)
>=
2
,
ValueError
,
"No previous data for training for the date {0}"
.
format
(
ref_date
))
end
=
dates
[
-
2
]
end
=
dates
[
-
2
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
-
1
else
dates
[
0
]
start
=
dates
[
-
batch
-
2
]
if
batch
<=
len
(
dates
)
-
2
else
dates
[
0
]
else
:
else
:
end
=
dates
[
-
1
]
end
=
dates
[
-
1
]
start
=
dates
[
-
batch
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
start
=
dates
[
-
batch
-
1
]
if
batch
<=
len
(
dates
)
else
dates
[
0
]
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
index
=
(
date_label
>=
start
)
&
(
date_label
<=
end
)
this_raw_x
=
x_values
[
index
]
this_raw_x
=
x_values
[
index
]
...
@@ -352,7 +353,8 @@ def fetch_predict_phase(engine,
...
@@ -352,7 +353,8 @@ def fetch_predict_phase(engine,
pre_process
:
Iterable
[
object
]
=
None
,
pre_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
post_process
:
Iterable
[
object
]
=
None
,
warm_start
:
int
=
0
,
warm_start
:
int
=
0
,
fillna
:
str
=
None
):
fillna
:
str
=
None
,
fit_target
:
Union
[
Transformer
,
object
]
=
None
):
if
isinstance
(
alpha_factors
,
Transformer
):
if
isinstance
(
alpha_factors
,
Transformer
):
transformer
=
alpha_factors
transformer
=
alpha_factors
else
:
else
:
...
@@ -369,6 +371,8 @@ def fetch_predict_phase(engine,
...
@@ -369,6 +371,8 @@ def fetch_predict_phase(engine,
dateRule
=
BizDayConventions
.
Following
,
dateRule
=
BizDayConventions
.
Following
,
dateGenerationRule
=
DateGeneration
.
Backward
)
dateGenerationRule
=
DateGeneration
.
Backward
)
horizon
=
map_freq
(
frequency
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
dates
=
dates
)
if
fillna
:
if
fillna
:
...
@@ -377,6 +381,14 @@ def fetch_predict_phase(engine,
...
@@ -377,6 +381,14 @@ def fetch_predict_phase(engine,
else
:
else
:
factor_df
=
factor_df
.
dropna
()
factor_df
=
factor_df
.
dropna
()
if
fit_target
is
None
:
target_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
else
:
one_more_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
target_df
=
engine
.
fetch_factor_range_forward
(
universe
,
factors
=
fit_target
,
dates
=
dates
+
[
one_more_date
])
target_df
=
target_df
[
target_df
.
trade_date
.
isin
(
dates
)]
target_df
=
target_df
.
groupby
(
'code'
)
.
apply
(
lambda
x
:
x
.
fillna
(
method
=
'pad'
))
names
=
transformer
.
names
names
=
transformer
.
names
if
neutralized_risk
:
if
neutralized_risk
:
...
@@ -384,13 +396,17 @@ def fetch_predict_phase(engine,
...
@@ -384,13 +396,17 @@ def fetch_predict_phase(engine,
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
names
))
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
risk_df
=
risk_df
[[
'trade_date'
,
'code'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'trade_date'
,
'code'
])
train_x
=
pd
.
merge
(
train_x
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
else
:
else
:
train_x
=
factor_df
.
copy
(
)
train_x
=
pd
.
merge
(
factor_df
,
target_df
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
risk_exp
=
None
risk_exp
=
None
train_x
.
dropna
(
inplace
=
True
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
names
]
.
values
.
astype
(
float
)
y_values
=
train_x
[[
'dx'
]]
.
values
.
astype
(
float
)
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_date
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
train_x
.
trade_date
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
dates
=
np
.
unique
(
date_label
)
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
if
dates
[
-
1
]
==
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
):
...
@@ -400,6 +416,7 @@ def fetch_predict_phase(engine,
...
@@ -400,6 +416,7 @@ def fetch_predict_phase(engine,
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
left_index
=
bisect
.
bisect_left
(
date_label
,
start
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
this_raw_x
=
x_values
[
left_index
:
right_index
]
this_raw_x
=
x_values
[
left_index
:
right_index
]
this_raw_y
=
y_values
[
left_index
:
right_index
]
sub_dates
=
date_label
[
left_index
:
right_index
]
sub_dates
=
date_label
[
left_index
:
right_index
]
if
risk_exp
is
not
None
:
if
risk_exp
is
not
None
:
...
@@ -412,10 +429,16 @@ def fetch_predict_phase(engine,
...
@@ -412,10 +429,16 @@ def fetch_predict_phase(engine,
risk_factors
=
this_risk_exp
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
post_process
=
post_process
)
ne_y
=
factor_processing
(
this_raw_y
,
pre_process
=
pre_process
,
risk_factors
=
this_risk_exp
,
post_process
=
post_process
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_left_index
=
bisect
.
bisect_left
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
inner_right_index
=
bisect
.
bisect_right
(
sub_dates
,
end
)
ne_x
=
ne_x
[
inner_left_index
:
inner_right_index
]
ne_x
=
ne_x
[
inner_left_index
:
inner_right_index
]
ne_y
=
ne_y
[
inner_left_index
:
inner_right_index
]
left_index
=
bisect
.
bisect_left
(
date_label
,
end
)
left_index
=
bisect
.
bisect_left
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
right_index
=
bisect
.
bisect_right
(
date_label
,
end
)
...
@@ -423,11 +446,12 @@ def fetch_predict_phase(engine,
...
@@ -423,11 +446,12 @@ def fetch_predict_phase(engine,
codes
=
train_x
.
code
.
values
[
left_index
:
right_index
]
codes
=
train_x
.
code
.
values
[
left_index
:
right_index
]
else
:
else
:
ne_x
=
None
ne_x
=
None
ne_y
=
None
codes
=
None
codes
=
None
ret
=
dict
()
ret
=
dict
()
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'x_names'
]
=
transformer
.
names
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
}
ret
[
'predict'
]
=
{
'x'
:
pd
.
DataFrame
(
ne_x
,
columns
=
transformer
.
names
),
'code'
:
codes
,
'y'
:
ne_y
.
flatten
()
}
return
ret
return
ret
...
@@ -437,7 +461,7 @@ if __name__ == '__main__':
...
@@ -437,7 +461,7 @@ if __name__ == '__main__':
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'hs300'
,
'zz500'
])
neutralized_risk
=
risk_styles
+
industry_styles
neutralized_risk
=
risk_styles
+
industry_styles
res
=
fetch_
train
_phase
(
engine
,
[
'ep_q'
],
res
=
fetch_
predict
_phase
(
engine
,
[
'ep_q'
],
'2012-01-05'
,
'2012-01-05'
,
'5b'
,
'5b'
,
universe
,
universe
,
...
...
notebooks/Example 11 - Long Short Strategy Model.ipynb
0 → 100644
View file @
a1cc7865
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment