Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
8c79003a
Commit
8c79003a
authored
May 03, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added strategy
parent
69504bfe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
224 additions
and
164 deletions
+224
-164
composer.py
alphamind/model/composer.py
+8
-5
linearmodel.py
alphamind/model/linearmodel.py
+1
-1
constraints.py
alphamind/portfolio/constraints.py
+1
-1
strategy.py
alphamind/strategy/strategy.py
+214
-157
No files found.
alphamind/model/composer.py
View file @
8c79003a
...
@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
...
@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
from
alphamind.data.rank
import
rank
from
alphamind.data.rank
import
rank
from
alphamind.data.standardize
import
standardize
from
alphamind.data.standardize
import
standardize
from
alphamind.model.loader
import
load_model
from
alphamind.model.loader
import
load_model
from
alphamind.model.linearmodel
import
ConstLinearModel
PROCESS_MAPPING
=
{
PROCESS_MAPPING
=
{
'winsorize_normal'
:
winsorize_normal
,
'winsorize_normal'
:
winsorize_normal
,
...
@@ -144,11 +145,13 @@ def train_model(ref_date: str,
...
@@ -144,11 +145,13 @@ def train_model(ref_date: str,
x_values
:
pd
.
DataFrame
=
None
,
x_values
:
pd
.
DataFrame
=
None
,
y_values
:
pd
.
DataFrame
=
None
):
y_values
:
pd
.
DataFrame
=
None
):
base_model
=
copy
.
deepcopy
(
alpha_model
)
base_model
=
copy
.
deepcopy
(
alpha_model
)
if
x_values
is
None
:
train_data
=
data_meta
.
fetch_train_data
(
ref_date
,
alpha_model
)
if
not
isinstance
(
alpha_model
,
ConstLinearModel
):
x_values
=
train_data
[
'train'
][
'x'
]
if
x_values
is
None
:
y_values
=
train_data
[
'train'
][
'y'
]
train_data
=
data_meta
.
fetch_train_data
(
ref_date
,
alpha_model
)
base_model
.
fit
(
x_values
,
y_values
)
x_values
=
train_data
[
'train'
][
'x'
]
y_values
=
train_data
[
'train'
][
'y'
]
base_model
.
fit
(
x_values
,
y_values
)
return
base_model
return
base_model
...
...
alphamind/model/linearmodel.py
View file @
8c79003a
...
@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object):
...
@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object):
self
.
weights
=
weights
.
flatten
()
self
.
weights
=
weights
.
flatten
()
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
def
fit
(
self
,
x
:
np
.
ndarray
,
y
:
np
.
ndarray
):
pass
raise
NotImplementedError
(
"Const linear model doesn't offer fit methodology"
)
def
predict
(
self
,
x
:
np
.
ndarray
):
def
predict
(
self
,
x
:
np
.
ndarray
):
return
x
@
self
.
weights
return
x
@
self
.
weights
...
...
alphamind/portfolio/constraints.py
View file @
8c79003a
...
@@ -108,7 +108,7 @@ class LinearConstraints(object):
...
@@ -108,7 +108,7 @@ class LinearConstraints(object):
backbone
:
np
.
ndarray
=
None
):
backbone
:
np
.
ndarray
=
None
):
pyFinAssert
(
len
(
bounds
)
==
cons_mat
.
shape
[
1
],
"Number of bounds should be same as number of col of cons_mat"
)
pyFinAssert
(
len
(
bounds
)
==
cons_mat
.
shape
[
1
],
"Number of bounds should be same as number of col of cons_mat"
)
self
.
names
=
list
(
bounds
.
keys
(
))
self
.
names
=
list
(
set
(
bounds
.
keys
())
.
intersection
(
set
(
cons_mat
.
columns
)
))
self
.
bounds
=
bounds
self
.
bounds
=
bounds
self
.
cons_mat
=
cons_mat
self
.
cons_mat
=
cons_mat
self
.
backbone
=
backbone
self
.
backbone
=
backbone
...
...
alphamind/strategy/strategy.py
View file @
8c79003a
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
"""
Created on 201
7-9-14
Created on 201
8-5-3
@author: cheng.li
@author: cheng.li
"""
"""
import
datetime
as
dt
import
copy
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
alphamind.data
as
data_module
from
PyFin.api
import
makeSchedule
import
alphamind.model
as
model_module
from
alphamind.utilities
import
map_freq
from
alphamind.data.engines.universe
import
Universe
from
alphamind.utilities
import
alpha_logger
from
alphamind.model.modelbase
import
ModelBase
from
alphamind.model.composer
import
train_model
from
alphamind.data.engines.sqlengine
import
industry_styles
from
alphamind.portfolio.constraints
import
LinearConstraints
from
alphamind.portfolio.constraints
import
BoundaryType
from
alphamind.portfolio.constraints
import
create_box_bounds
from
alphamind.execution.naiveexecutor
import
NaiveExecutor
from
alphamind.data.engines.sqlengine
import
risk_styles
from
alphamind.data.engines.sqlengine
import
risk_styles
from
alphamind.model.data_preparing
import
fetch_data_package
from
alphamind.data.engines.sqlengine
import
industry_styles
from
alphamind.model.data_preparing
import
fetch_predict_phase
from
alphamind.data.engines.sqlengine
import
macro_styles
from
alphamind.model.data_preparing
import
fetch_train_phase
from
alphamind.data.processing
import
factor_processing
from
alphamind.analysis.factoranalysis
import
er_portfolio_analysis
def
load_process
(
names
:
list
)
->
list
:
return
[
getattr
(
data_module
,
name
)
for
name
in
names
]
def
load_neutralize_risks
(
names
:
list
)
->
list
:
all_styles
=
risk_styles
+
industry_styles
+
macro_styles
risks
=
[
]
total_risk_names
=
[
'benchmark'
,
'total'
]
for
name
in
names
:
b_type
=
[]
if
name
==
'industry_styles'
:
l_val
=
[]
risks
.
extend
(
industry_styles
)
u_val
=
[]
elif
name
==
'risk_styles'
:
risks
.
extend
(
risk_styles
)
else
:
risks
.
append
(
name
)
return
risks
for
name
in
total_risk_names
:
if
name
==
'benchmark'
:
b_type
.
append
(
BoundaryType
.
RELATIVE
)
l_val
.
append
(
0.8
)
u_val
.
append
(
1.0
)
else
:
b_type
.
append
(
BoundaryType
.
RELATIVE
)
l_val
.
append
(
1.0
)
u_val
.
append
(
1.0
)
bounds
=
create_box_bounds
(
total_risk_names
,
b_type
,
l_val
,
u_val
)
def
load_model_meta
(
name
:
str
)
->
ModelBase
:
return
getattr
(
model_module
,
name
)
class
RunningSetting
(
object
):
def
load_universe
(
universe
:
list
)
->
Universe
:
def
__init__
(
self
,
return
Universe
(
universe
[
0
],
universe
[
1
])
universe
,
start_date
,
end_date
,
freq
,
benchmark
=
905
,
industry_cat
=
'sw_adj'
,
industry_level
=
1
,
rebalance_method
=
'risk_neutral'
,
**
kwargs
):
self
.
universe
=
universe
self
.
dates
=
makeSchedule
(
start_date
,
end_date
,
freq
,
'china.sse'
)
self
.
dates
=
[
d
.
strftime
(
'
%
Y-
%
m-
%
d'
)
for
d
in
self
.
dates
]
self
.
benchmark
=
benchmark
self
.
horizon
=
map_freq
(
freq
)
self
.
executor
=
NaiveExecutor
()
self
.
industry_cat
=
industry_cat
self
.
industry_level
=
industry_level
self
.
rebalance_method
=
rebalance_method
self
.
more_opts
=
kwargs
class
Strategy
(
object
):
class
Strategy
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
data_source
,
alpha_model
,
strategy_desc
:
dict
,
data_meta
,
cache_start_date
=
None
,
running_setting
):
cache_end_date
=
None
):
self
.
alpha_model
=
alpha_model
self
.
data_source
=
data_source
self
.
data_meta
=
data_meta
self
.
strategy_name
=
strategy_desc
[
'strategy_name'
]
self
.
running_setting
=
running_setting
self
.
pre_process
=
load_process
(
strategy_desc
[
'data_process'
][
'pre_process'
])
self
.
engine
=
self
.
data_meta
.
engine
self
.
post_process
=
load_process
(
strategy_desc
[
'data_process'
][
'pre_process'
])
self
.
neutralize_risk
=
load_neutralize_risks
(
strategy_desc
[
'data_process'
][
'neutralize_risk'
])
def
run
(
self
):
self
.
risk_model
=
strategy_desc
[
'risk_model'
]
alpha_logger
.
info
(
"starting backting ..."
)
self
.
model_type
=
load_model_meta
(
strategy_desc
[
'alpha_model'
])
self
.
parameters
=
strategy_desc
[
'parameters'
]
total_factors
=
self
.
engine
.
fetch_factor_range
(
self
.
running_setting
.
universe
,
self
.
features
=
strategy_desc
[
'features'
]
self
.
alpha_model
.
formulas
,
self
.
model
=
self
.
model_type
(
features
=
self
.
features
,
**
self
.
parameters
)
dates
=
self
.
running_setting
.
dates
)
alpha_logger
.
info
(
"alpha factor data loading finished ..."
)
self
.
is_const_model
=
isinstance
(
self
.
model
,
model_module
.
ConstLinearModel
)
total_industry
=
self
.
engine
.
fetch_industry_matrix_range
(
universe
,
if
self
.
is_const_model
:
dates
=
self
.
running_setting
.
dates
,
self
.
weights
=
strategy_desc
[
'weights'
]
category
=
self
.
running_setting
.
industry_cat
,
level
=
self
.
running_setting
.
industry_level
)
self
.
freq
=
strategy_desc
[
'freq'
]
alpha_logger
.
info
(
"industry data loading finished ..."
)
self
.
universe
=
load_universe
(
strategy_desc
[
'universe'
])
self
.
benchmark
=
strategy_desc
[
'benchmark'
]
total_benchmark
=
self
.
engine
.
fetch_benchmark_range
(
dates
=
self
.
running_setting
.
dates
,
benchmark
=
self
.
running_setting
.
benchmark
)
self
.
batch
=
strategy_desc
[
'batch'
]
alpha_logger
.
info
(
"benchmark data loading finished ..."
)
self
.
warm_start
=
strategy_desc
[
'warm_start'
]
total_risk_cov
,
total_risk_exposure
=
self
.
engine
.
fetch_risk_model_range
(
if
cache_start_date
and
cache_end_date
:
universe
,
self
.
cached_data
=
fetch_data_package
(
self
.
data_source
,
dates
=
self
.
running_setting
.
dates
,
self
.
features
,
risk_model
=
self
.
data_meta
.
risk_model
cache_start_date
,
)
cache_end_date
,
alpha_logger
.
info
(
"risk_model data loading finished ..."
)
self
.
freq
,
self
.
universe
,
total_returns
=
self
.
engine
.
fetch_dx_return_range
(
self
.
running_setting
.
universe
,
self
.
benchmark
,
dates
=
self
.
running_setting
.
dates
,
self
.
warm_start
,
horizon
=
self
.
running_setting
.
horizon
,
self
.
batch
,
offset
=
1
)
self
.
neutralize_risk
,
alpha_logger
.
info
(
"returns data loading finished ..."
)
self
.
risk_model
,
self
.
pre_process
,
total_data
=
pd
.
merge
(
total_factors
,
total_industry
,
on
=
[
'trade_date'
,
'code'
])
self
.
post_process
)
total_data
=
pd
.
merge
(
total_data
,
total_benchmark
,
on
=
[
'trade_date'
,
'code'
],
how
=
'left'
)
total_data
.
fillna
({
'weight'
:
0.
},
inplace
=
True
)
# some cached data to fast processing
total_data
=
pd
.
merge
(
total_data
,
total_returns
,
on
=
[
'trade_date'
,
'code'
])
settlement_data
=
self
.
cached_data
[
'settlement'
]
total_data
=
pd
.
merge
(
total_data
,
total_risk_exposure
,
on
=
[
'trade_date'
,
'code'
])
.
fillna
(
total_data
.
median
())
self
.
settle_dfs
=
settlement_data
.
set_index
(
'code'
)
.
groupby
(
'trade_date'
)
total_data_groups
=
total_data
.
groupby
(
'trade_date'
)
self
.
scheduled_dates
=
set
(
k
.
strftime
(
'
%
Y-
%
m-
%
d'
)
for
k
in
self
.
cached_data
[
'train'
][
'x'
]
.
keys
())
else
:
rets
=
[]
self
.
cached_data
=
None
turn_overs
=
[]
self
.
scheduled_dates
=
None
executor
=
copy
.
deepcopy
(
self
.
running_setting
.
executor
)
positions
=
pd
.
DataFrame
()
def
cached_dates
(
self
):
return
sorted
(
self
.
scheduled_dates
)
for
ref_date
,
this_data
in
total_data_groups
:
new_model
=
train_model
(
ref_date
.
strftime
(
'
%
Y-
%
m-
%
d'
),
self
.
alpha_model
,
self
.
data_meta
)
def
model_train
(
self
,
ref_date
:
str
):
codes
=
this_data
.
code
.
values
.
tolist
()
if
not
self
.
is_const_model
:
if
self
.
cached_data
and
ref_date
in
self
.
scheduled_dates
:
if
self
.
running_setting
.
rebalance_method
==
'tv'
:
ref_date
=
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
)
risk_cov
=
total_risk_cov
[
total_risk_cov
.
trade_date
==
ref_date
]
ne_x
=
self
.
cached_data
[
'train'
][
'x'
][
ref_date
]
sec_cov
=
self
.
_generate_sec_cov
(
this_data
,
risk_cov
)
ne_y
=
self
.
cached_data
[
'train'
][
'y'
][
ref_date
]
else
:
else
:
data
=
fetch_train_phase
(
self
.
data_source
,
sec_cov
=
None
self
.
features
,
ref_date
,
benchmark_w
=
this_data
.
weight
.
values
self
.
freq
,
is_in_benchmark
=
(
benchmark_w
>
0.
)
.
astype
(
float
)
.
reshape
((
-
1
,
1
))
self
.
universe
,
constraints_exp
=
np
.
concatenate
([
is_in_benchmark
,
self
.
batch
,
np
.
ones_like
(
is_in_benchmark
)],
self
.
neutralize_risk
,
axis
=
1
)
self
.
risk_model
,
constraints_exp
=
pd
.
DataFrame
(
constraints_exp
,
columns
=
[
'benchmark'
,
'total'
])
self
.
pre_process
,
constraints
=
LinearConstraints
(
bounds
,
constraints_exp
,
benchmark_w
)
self
.
post_process
,
self
.
warm_start
)
lbound
=
np
.
maximum
(
0.
,
benchmark_w
-
0.02
)
ubound
=
0.02
+
benchmark_w
ne_x
=
data
[
'train'
][
'x'
]
ne_y
=
data
[
'train'
][
'y'
]
features
=
new_model
.
features
self
.
model
.
fit
(
ne_x
,
ne_y
)
raw_factors
=
this_data
[
features
]
.
values
new_factors
=
factor_processing
(
raw_factors
,
def
model_predict
(
self
,
ref_date
:
str
)
->
pd
.
DataFrame
:
pre_process
=
self
.
data_meta
.
pre_process
,
if
self
.
cached_data
and
ref_date
in
self
.
scheduled_dates
:
risk_factors
=
self
.
data_meta
.
neutralized_risk
,
ref_date
=
dt
.
datetime
.
strptime
(
ref_date
,
'
%
Y-
%
m-
%
d'
)
post_process
=
self
.
data_meta
.
post_process
)
ne_x
=
self
.
cached_data
[
'predict'
][
'x'
][
ref_date
]
settlement_data
=
self
.
cached_data
[
'settlement'
]
er
=
new_model
.
predict
(
pd
.
DataFrame
(
new_factors
,
columns
=
features
))
codes
=
settlement_data
.
loc
[
settlement_data
.
trade_date
==
ref_date
,
'code'
]
.
values
else
:
alpha_logger
.
info
(
'{0} re-balance: {1} codes'
.
format
(
ref_date
,
len
(
er
)))
data
=
fetch_predict_phase
(
self
.
data_source
,
target_pos
,
_
=
er_portfolio_analysis
(
er
,
self
.
features
,
this_data
.
industry_name
.
values
,
ref_date
,
None
,
self
.
freq
,
constraints
,
self
.
universe
,
False
,
self
.
batch
,
benchmark_w
,
self
.
neutralize_risk
,
method
=
self
.
running_setting
.
rebalance_method
,
self
.
risk_model
,
lbound
=
lbound
,
self
.
pre_process
,
ubound
=
ubound
,
self
.
post_process
,
target_vol
=
0.05
,
self
.
warm_start
)
cov
=
sec_cov
)
ne_x
=
data
[
'predict'
][
'x'
]
target_pos
[
'code'
]
=
codes
codes
=
data
[
'predict'
][
'code'
]
target_pos
[
'trade_date'
]
=
ref_date
target_pos
[
'benchmark_weight'
]
=
benchmark_w
prediction
=
self
.
model
.
predict
(
ne_x
)
.
flatten
()
target_pos
[
'dx'
]
=
this_data
.
dx
.
values
return
pd
.
DataFrame
({
'prediction'
:
prediction
,
'code'
:
codes
})
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
def
settlement
(
self
,
ref_date
:
str
,
prediction
:
pd
.
DataFrame
)
->
float
:
ret
=
executed_pos
.
weight
.
values
@
(
np
.
exp
(
this_data
.
dx
.
values
)
-
1.
)
settlement_data
=
self
.
settle_dfs
.
get_group
(
ref_date
)[[
'dx'
,
'weight'
]]
rets
.
append
(
np
.
log
(
1.
+
ret
))
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
positions
=
positions
.
append
(
target_pos
)
trade_dates
=
positions
.
trade_date
.
unique
()
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
},
index
=
trade_dates
)
index_return
=
self
.
engine
.
fetch_dx_return_index_range
(
self
.
running_setting
.
benchmark
,
dates
=
self
.
running_setting
.
dates
,
horizon
=
self
.
running_setting
.
horizon
,
offset
=
1
)
.
set_index
(
'trade_date'
)
ret_df
[
'benchmark_returns'
]
=
index_return
[
'dx'
]
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ret_df
.
index
[
-
1
],
freq
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'excess_return'
]
=
ret_df
[
'returns'
]
-
ret_df
[
'benchmark_returns'
]
return
ret_df
,
positions
@
staticmethod
def
_generate_sec_cov
(
current_data
,
risk_cov
):
risk_exposure
=
current_data
[
all_styles
]
.
values
risk_cov
=
risk_cov
[
all_styles
]
.
values
special_risk
=
current_data
[
'srisk'
]
.
values
sec_cov
=
risk_exposure
@
risk_cov
@
risk_exposure
.
T
/
10000
+
np
.
diag
(
special_risk
**
2
)
/
10000
return
sec_cov
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
json
from
matplotlib
import
pyplot
as
plt
import
pprint
from
PyFin.api
import
*
from
alphamind.data.engines.sqlengine
import
SqlEngine
from
alphamind.api
import
Universe
from
PyFin.api
import
makeSchedule
from
alphamind.api
import
ConstLinearModel
from
alphamind.api
import
DataMeta
engine
=
SqlEngine
()
start_date
=
'2010-01-01'
start_date
=
'2017-06-01'
end_date
=
'2018-04-19'
end_date
=
'2017-09-14'
freq
=
'10b'
neutralized_risk
=
None
with
open
(
"sample_strategy.json"
,
'r'
)
as
fp
:
universe
=
Universe
(
"custom"
,
[
'zz800'
])
strategy_desc
=
json
.
load
(
fp
)
strategy
=
Strategy
(
engine
,
strategy_desc
,
start_date
,
end_date
)
factor
=
'RVOL'
alpha_factors
=
{
'f01'
:
CSQuantiles
(
LAST
(
factor
),
groups
=
'sw1_adj'
)}
dates
=
strategy
.
cached_dates
()
weights
=
{
'f01'
:
1.
}
print
(
dates
)
alpha_model
=
ConstLinearModel
(
features
=
alpha_factors
,
weights
=
weights
)
for
date
in
dates
:
data_meta
=
DataMeta
(
freq
=
freq
,
strategy
.
model_train
(
date
)
universe
=
universe
,
prediction
=
strategy
.
model_predict
(
date
)
batch
=
1
)
strategy
.
settlement
(
date
,
prediction
)
\ No newline at end of file
running_setting
=
RunningSetting
(
universe
,
start_date
,
end_date
,
freq
,
rebalance_method
=
'tv'
)
strategy
=
Strategy
(
alpha_model
,
data_meta
,
running_setting
)
ret_df
,
positions
=
strategy
.
run
()
ret_df
[
'excess_return'
]
.
cumsum
()
.
plot
()
plt
.
title
(
f
"{factor}"
)
plt
.
show
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment