Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
910ae0a4
Commit
910ae0a4
authored
Jan 31, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update example
parent
57c969bd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
266 additions
and
191 deletions
+266
-191
factor_analysis_example.py
alphamind/examples/factor_analysis_example.py
+215
-174
factor_res_analysis.py
alphamind/examples/factor_res_analysis.py
+51
-17
No files found.
alphamind/examples/factor_analysis_example.py
View file @
910ae0a4
...
...
@@ -20,12 +20,10 @@ plt.style.use('ggplot')
Back test parameter settings
"""
start_date
=
'2011-01-01'
end_date
=
'2018-01-11'
benchmark_code
=
300
universe_name
=
[
'zz800'
]
universe
=
Universe
(
'custom'
,
universe_name
)
frequency
=
'5b'
start_date
=
'2010-01-01'
end_date
=
'2018-01-26'
frequency
=
'10b'
method
=
'risk_neutral'
industry_lower
=
1.
industry_upper
=
1.
...
...
@@ -33,179 +31,222 @@ neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
constraint_risk
=
[
'SIZE'
,
'LEVERAGE'
]
+
industry_styles
size_risk_lower
=
0
size_risk_upper
=
0
turn_over_target_base
=
0.
25
turn_over_target_base
=
0.
30
benchmark_total_lower
=
0.8
benchmark_total_upper
=
1.0
horizon
=
map_freq
(
frequency
)
executor
=
NaiveExecutor
()
engine
=
SqlEngine
(
'postgres+psycopg2://postgres:we083826@192.168.0.102/alpha'
)
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name
=
[
'alpha_factor'
]
factor_name
=
'SalesCostRatio'
base1
=
LAST
(
'roe_q'
)
base2
=
CSRes
(
LAST
(
'ep_q'
),
'roe_q'
)
simple_expression
=
DIFF
(
CSRes
(
CSRes
(
LAST
(
factor_name
),
base1
),
base2
))
const_features
=
{
alpha_name
[
0
]:
simple_expression
}
const_weights
=
np
.
array
([
1.
])
const_model
=
ConstLinearModel
(
features
=
alpha_name
,
weights
=
const_weights
)
ref_dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
'china.sse'
)
const_model_factor_data
=
engine
.
fetch_data_range
(
universe
,
const_features
,
dates
=
ref_dates
,
benchmark
=
benchmark_code
)[
'factor'
]
.
dropna
()
horizon
=
map_freq
(
frequency
)
rets
=
[]
turn_overs
=
[]
leverags
=
[]
previous_pos
=
pd
.
DataFrame
()
index_dates
=
[]
factor_groups
=
const_model_factor_data
.
groupby
(
'trade_date'
)
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
]
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
index_dates
.
append
(
date
)
total_data
=
data
.
fillna
(
data
[
alpha_name
]
.
median
())
alpha_logger
.
info
(
'{0}: {1}'
.
format
(
date
,
len
(
total_data
)))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
industry
=
total_data
.
industry_code
.
values
benchmark_w
=
total_data
.
weight
.
values
constraint_exp
=
total_data
[
constraint_risk
]
.
values
risk_exp_expand
=
np
.
concatenate
((
constraint_exp
,
np
.
ones
((
len
(
risk_exp
),
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
=
constraint_risk
+
[
'total'
]
risk_target
=
risk_exp_expand
.
T
@
benchmark_w
lbound
=
np
.
maximum
(
0.
,
benchmark_w
-
0.02
)
# np.zeros(len(total_data))
ubound
=
0.02
+
benchmark_w
is_in_benchmark
=
(
benchmark_w
>
0.
)
.
astype
(
float
)
risk_exp_expand
=
np
.
concatenate
((
risk_exp_expand
,
is_in_benchmark
.
reshape
((
-
1
,
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
.
append
(
'benchmark_total'
)
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
j
,
name
in
enumerate
(
risk_names
):
if
name
==
'total'
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
],
upper_bound
=
risk_target
[
j
])
elif
name
==
'SIZE'
:
base_target
=
abs
(
risk_target
[
j
])
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
]
+
base_target
*
size_risk_lower
,
upper_bound
=
risk_target
[
j
]
+
base_target
*
size_risk_upper
)
elif
name
==
'benchmark_total'
:
base_target
=
benchmark_w
.
sum
()
constraint
.
set_constraints
(
name
,
lower_bound
=
benchmark_total_lower
*
base_target
,
upper_bound
=
benchmark_total_upper
*
base_target
)
def
factor_analysis
(
engine
,
factor_name
,
universe
,
benchmark_code
,
positive
=
True
):
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name
=
[
factor_name
+
'_'
+
(
'pos'
if
positive
else
'neg'
)]
base1
=
LAST
(
'Alpha60'
)
base2
=
CSRes
(
'roe_q'
,
base1
)
base3
=
CSRes
(
CSRes
(
'ep_q'
,
base1
),
base2
)
simple_expression
=
CSRes
(
CSRes
(
CSRes
(
LAST
(
factor_name
),
base1
),
base2
),
base3
)
if
not
positive
:
simple_expression
=
-
simple_expression
const_features
=
{
alpha_name
[
0
]:
simple_expression
}
const_weights
=
np
.
array
([
1.
])
const_model
=
ConstLinearModel
(
features
=
alpha_name
,
weights
=
const_weights
)
ref_dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
'china.sse'
)
const_model_factor_data
=
engine
.
fetch_data_range
(
universe
,
const_features
,
dates
=
ref_dates
,
benchmark
=
benchmark_code
)[
'factor'
]
.
dropna
()
horizon
=
map_freq
(
frequency
)
rets
=
[]
turn_overs
=
[]
leverags
=
[]
previous_pos
=
pd
.
DataFrame
()
index_dates
=
[]
factor_groups
=
const_model_factor_data
.
groupby
(
'trade_date'
)
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
]
index_dates
.
append
(
date
)
total_data
=
data
.
fillna
(
data
[
alpha_name
]
.
median
())
alpha_logger
.
info
(
'{0}: {1}'
.
format
(
date
,
len
(
total_data
)))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
industry
=
total_data
.
industry_code
.
values
benchmark_w
=
total_data
.
weight
.
values
constraint_exp
=
total_data
[
constraint_risk
]
.
values
risk_exp_expand
=
np
.
concatenate
((
constraint_exp
,
np
.
ones
((
len
(
risk_exp
),
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
=
constraint_risk
+
[
'total'
]
risk_target
=
risk_exp_expand
.
T
@
benchmark_w
lbound
=
np
.
maximum
(
0.
,
benchmark_w
-
0.02
)
# np.zeros(len(total_data))
ubound
=
0.02
+
benchmark_w
is_in_benchmark
=
(
benchmark_w
>
0.
)
.
astype
(
float
)
risk_exp_expand
=
np
.
concatenate
((
risk_exp_expand
,
is_in_benchmark
.
reshape
((
-
1
,
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
.
append
(
'benchmark_total'
)
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
j
,
name
in
enumerate
(
risk_names
):
if
name
==
'total'
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
],
upper_bound
=
risk_target
[
j
])
elif
name
==
'SIZE'
:
base_target
=
abs
(
risk_target
[
j
])
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
]
+
base_target
*
size_risk_lower
,
upper_bound
=
risk_target
[
j
]
+
base_target
*
size_risk_upper
)
elif
name
==
'benchmark_total'
:
base_target
=
benchmark_w
.
sum
()
constraint
.
set_constraints
(
name
,
lower_bound
=
benchmark_total_lower
*
base_target
,
upper_bound
=
benchmark_total_upper
*
base_target
)
else
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
]
*
industry_lower
,
upper_bound
=
risk_target
[
j
]
*
industry_upper
)
factor_values
=
factor_processing
(
total_data
[
alpha_name
]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
# const linear model
er
=
const_model
.
predict
(
factor_values
)
codes
=
total_data
[
'code'
]
.
values
if
previous_pos
.
empty
:
current_position
=
None
turn_over_target
=
None
else
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
j
]
*
industry_lower
,
upper_bound
=
risk_target
[
j
]
*
industry_upper
)
factor_values
=
factor_processing
(
total_data
[
alpha_name
]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
# const linear model
er
=
const_model
.
predict
(
factor_values
)
codes
=
total_data
[
'code'
]
.
values
if
previous_pos
.
empty
:
current_position
=
None
turn_over_target
=
None
else
:
previous_pos
.
set_index
(
'code'
,
inplace
=
True
)
remained_pos
=
previous_pos
.
loc
[
codes
]
remained_pos
.
fillna
(
0.
,
inplace
=
True
)
turn_over_target
=
turn_over_target_base
current_position
=
remained_pos
.
weight
.
values
try
:
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
None
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
turn_over_target
=
turn_over_target
,
current_position
=
current_position
,
lbound
=
lbound
,
ubound
=
ubound
)
except
ValueError
:
alpha_logger
.
info
(
'{0} full re-balance'
.
format
(
date
))
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
None
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
lbound
=
lbound
,
ubound
=
ubound
)
target_pos
[
'code'
]
=
total_data
[
'code'
]
.
values
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
executed_codes
=
executed_pos
.
code
.
tolist
()
dx_returns
=
engine
.
fetch_dx_return
(
date
,
executed_codes
,
horizon
=
horizon
,
offset
=
1
)
result
=
pd
.
merge
(
executed_pos
,
total_data
[[
'code'
,
'weight'
]],
on
=
[
'code'
],
how
=
'inner'
)
result
=
pd
.
merge
(
result
,
dx_returns
,
on
=
[
'code'
])
leverage
=
result
.
weight_x
.
abs
()
.
sum
()
ret
=
result
.
weight_x
.
values
@
(
np
.
exp
(
result
.
dx
.
values
)
-
1.
)
rets
.
append
(
np
.
log
(
1.
+
ret
))
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
leverags
.
append
(
leverage
)
previous_pos
=
executed_pos
alpha_logger
.
info
(
'{0} is finished'
.
format
(
date
))
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
,
'leverage'
:
leverage
},
index
=
index_dates
)
# index return
index_return
=
engine
.
fetch_dx_return_index_range
(
benchmark_code
,
start_date
,
end_date
,
horizon
=
horizon
,
offset
=
1
)
.
set_index
(
'trade_date'
)
ret_df
[
'index'
]
=
index_return
[
'dx'
]
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ref_dates
[
-
1
],
frequency
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'tc_cost'
]
=
ret_df
.
turn_over
*
0.002
ret_df
[
'returns'
]
=
ret_df
[
'returns'
]
-
ret_df
[
'index'
]
*
ret_df
[
'leverage'
]
ret_df
[[
'returns'
,
'tc_cost'
]]
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
),
title
=
'Fixed frequency rebalanced: {0} for {1}'
.
format
(
frequency
,
factor_name
),
secondary_y
=
'tc_cost'
)
plt
.
show
()
previous_pos
.
set_index
(
'code'
,
inplace
=
True
)
remained_pos
=
previous_pos
.
loc
[
codes
]
remained_pos
.
fillna
(
0.
,
inplace
=
True
)
turn_over_target
=
turn_over_target_base
current_position
=
remained_pos
.
weight
.
values
try
:
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
None
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
turn_over_target
=
turn_over_target
,
current_position
=
current_position
,
lbound
=
lbound
,
ubound
=
ubound
)
except
ValueError
:
alpha_logger
.
info
(
'{0} full re-balance'
.
format
(
date
))
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
None
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
lbound
=
lbound
,
ubound
=
ubound
)
target_pos
[
'code'
]
=
total_data
[
'code'
]
.
values
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
executed_codes
=
executed_pos
.
code
.
tolist
()
dx_returns
=
engine
.
fetch_dx_return
(
date
,
executed_codes
,
horizon
=
horizon
,
offset
=
1
)
result
=
pd
.
merge
(
executed_pos
,
total_data
[[
'code'
,
'weight'
]],
on
=
[
'code'
],
how
=
'inner'
)
result
=
pd
.
merge
(
result
,
dx_returns
,
on
=
[
'code'
])
leverage
=
result
.
weight_x
.
abs
()
.
sum
()
ret
=
result
.
weight_x
.
values
@
(
np
.
exp
(
result
.
dx
.
values
)
-
1.
)
rets
.
append
(
np
.
log
(
1.
+
ret
))
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
leverags
.
append
(
leverage
)
previous_pos
=
executed_pos
alpha_logger
.
info
(
'{0} is finished'
.
format
(
date
))
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
,
'leverage'
:
leverags
},
index
=
index_dates
)
# index return
index_return
=
engine
.
fetch_dx_return_index_range
(
benchmark_code
,
start_date
,
end_date
,
horizon
=
horizon
,
offset
=
1
)
.
set_index
(
'trade_date'
)
ret_df
[
'index'
]
=
index_return
[
'dx'
]
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ref_dates
[
-
1
],
frequency
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'tc_cost'
]
=
ret_df
.
turn_over
*
0.002
ret_df
[
'returns'
]
=
ret_df
[
'returns'
]
-
ret_df
[
'index'
]
*
ret_df
[
'leverage'
]
return
alpha_name
[
0
],
ret_df
def
worker_func_positive
(
factor_name
):
from
alphamind.api
import
SqlEngine
,
Universe
engine
=
SqlEngine
()
benchmark_code
=
300
universe_name
=
[
'zz800'
]
universe
=
Universe
(
'custom'
,
universe_name
)
return
factor_analysis
(
engine
,
factor_name
,
universe
,
benchmark_code
,
positive
=
True
)
def
worker_func_negative
(
factor_name
):
from
alphamind.api
import
SqlEngine
,
Universe
engine
=
SqlEngine
()
benchmark_code
=
300
universe_name
=
[
'zz800'
]
universe
=
Universe
(
'custom'
,
universe_name
)
return
factor_analysis
(
engine
,
factor_name
,
universe
,
benchmark_code
,
positive
=
False
)
if
__name__
==
'__main__'
:
from
dask.distributed
import
Client
client
=
Client
(
'10.63.6.176:8786'
)
engine
=
SqlEngine
()
df
=
engine
.
fetch_factor_coverage
()
df
=
df
[
df
.
universe
==
'zz800'
]
.
groupby
(
'factor'
)
.
mean
()
df
=
df
[
df
.
coverage
>=
0.98
]
tasks
=
client
.
map
(
worker_func_positive
,
df
.
index
.
tolist
())
res1
=
client
.
gather
(
tasks
)
tasks
=
client
.
map
(
worker_func_negative
,
df
.
index
.
tolist
())
res2
=
client
.
gather
(
tasks
)
factor_df
=
pd
.
DataFrame
()
for
f_name
,
df
in
res1
:
factor_df
[
f_name
]
=
df
[
'returns'
]
for
f_name
,
df
in
res2
:
factor_df
[
f_name
]
=
df
[
'returns'
]
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, benchmark_code),
# secondary_y='tc_cost')
alphamind/examples/factor_res_analysis.py
View file @
910ae0a4
...
...
@@ -13,6 +13,7 @@ from alphamind.api import *
def
factor_residue_analysis
(
start_date
,
end_date
,
factor_name
,
factor
,
freq
,
universe
,
...
...
@@ -26,10 +27,8 @@ def factor_residue_analysis(start_date,
tenor
=
freq
,
calendar
=
'china.sse'
)
alpha_factor_name
=
factor
+
'_res'
base1
=
LAST
(
'roe_q'
)
base2
=
CSRes
(
LAST
(
'ep_q'
),
'roe_q'
)
alpha_factor
=
{
alpha_factor_name
:
CSRes
(
CSRes
(
LAST
(
factor
),
base1
),
base2
)}
alpha_factor_name
=
factor_name
+
'_res'
alpha_factor
=
{
alpha_factor_name
:
factor
}
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
)[
'factor'
]
...
...
@@ -74,19 +73,54 @@ def factor_residue_analysis(start_date,
return
df
engine
=
SqlEngine
()
df
=
engine
.
fetch_factor_coverage
()
.
groupby
(
'factor'
)
.
mean
()
df
=
df
[
df
.
coverage
>=
0.98
]
universe
=
Universe
(
'custom'
,
[
'zz800'
])
factor_df
=
pd
.
DataFrame
()
for
i
,
factor
in
enumerate
(
df
.
index
):
res
=
factor_residue_analysis
(
'2011-01-01'
,
'2018-01-05'
,
def
factor_analysis
(
f_name
):
from
alphamind.api
import
SqlEngine
,
Universe
,
alpha_logger
engine
=
SqlEngine
()
universe
=
Universe
(
'custom'
,
[
'zz800'
])
base1
=
LAST
(
'Alpha60'
)
base2
=
CSRes
(
'roe_q'
,
base1
)
base3
=
CSRes
(
CSRes
(
'ep_q'
,
base1
),
base2
)
factor
=
CSRes
(
CSRes
(
CSRes
(
LAST
(
f_name
),
base1
),
base2
),
base3
)
res
=
factor_residue_analysis
(
'2010-01-01'
,
'2018-01-26'
,
f_name
,
factor
,
'
5
b'
,
'
10
b'
,
universe
,
engine
)
factor_df
[
factor
]
=
res
[
'$top1 - bottom1$'
]
alpha_logger
.
info
(
'{0}: {1} is done'
.
format
(
i
+
1
,
factor
))
alpha_logger
.
info
(
'{0} is done'
.
format
(
f_name
))
return
f_name
,
res
if
__name__
==
'__main__'
:
from
dask.distributed
import
Client
client
=
Client
(
'10.63.6.176:8786'
)
engine
=
SqlEngine
()
df
=
engine
.
fetch_factor_coverage
()
df
=
df
[
df
.
universe
==
'zz800'
]
.
groupby
(
'factor'
)
.
mean
()
df
=
df
[
df
.
coverage
>=
0.98
]
universe
=
Universe
(
'custom'
,
[
'zz800'
])
factor_df
=
pd
.
DataFrame
()
tasks
=
client
.
map
(
factor_analysis
,
df
.
index
.
tolist
())
res
=
client
.
gather
(
tasks
)
for
f_name
,
df
in
res
:
factor_df
[
f_name
]
=
df
[
'$top1 - bottom1$'
]
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment