Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
e007f467
Commit
e007f467
authored
Sep 15, 2018
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
removed unmaintained examples
parent
f10450af
Changes
12
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
0 additions
and
1763 deletions
+0
-1763
combined_model_training.py
alphamind/examples/combined_model_training.py
+0
-314
combined_model_training_daily.py
alphamind/examples/combined_model_training_daily.py
+0
-232
example_101.py
alphamind/examples/example_101.py
+0
-115
factor_analysis_example.py
alphamind/examples/factor_analysis_example.py
+0
-237
factor_res_analysis.py
alphamind/examples/factor_res_analysis.py
+0
-126
filter_example.py
alphamind/examples/filter_example.py
+0
-86
formula_expression.py
alphamind/examples/formula_expression.py
+0
-153
model_training.py
alphamind/examples/model_training.py
+0
-168
model_zoo.py
alphamind/examples/model_zoo.py
+0
-104
plot_quantile_res.py
alphamind/examples/plot_quantile_res.py
+0
-95
quantile_analysis_example.py
alphamind/examples/quantile_analysis_example.py
+0
-95
train_one_day_model.py
alphamind/examples/train_one_day_model.py
+0
-38
No files found.
alphamind/examples/combined_model_training.py
deleted
100644 → 0
View file @
f10450af
This diff is collapsed.
Click to expand it.
alphamind/examples/combined_model_training_daily.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
alphamind.api
import
*
from
PyFin.api
import
*
plt
.
style
.
use
(
'ggplot'
)
"""
Back test parameter settings
"""
start_date
=
'2017-01-01'
end_date
=
'2017-11-06'
benchmark_code
=
905
universe_name
=
'zz500'
universe
=
Universe
(
universe_name
,
[
universe_name
])
frequency
=
'2w'
batch
=
4
method
=
'risk_neutral'
use_rank
=
100
industry_lower
=
1.
industry_upper
=
1.
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
constraint_risk
=
[
'SIZE'
]
+
industry_styles
horizon
=
map_freq
(
frequency
)
executor
=
NaiveExecutor
()
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
const_features
=
[
"IVR"
,
"eps_q"
,
"DivP"
,
"CFinc1"
,
"BDTO"
]
const_weights
=
np
.
array
([
0.05
,
0.2
,
0.075
,
0.15
,
0.05
])
const_model
=
ConstLinearModel
(
features
=
const_features
,
weights
=
const_weights
)
linear_model_features
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'ivr'
:
LAST
(
'IVR'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
)
}
"""
Data phase
"""
engine
=
SqlEngine
()
linear_model_factor_data
=
fetch_data_package
(
engine
,
alpha_factors
=
linear_model_features
,
start_date
=
start_date
,
end_date
=
end_date
,
frequency
=
frequency
,
universe
=
universe
,
benchmark
=
benchmark_code
,
batch
=
batch
,
neutralized_risk
=
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
],
warm_start
=
batch
)
train_x
=
linear_model_factor_data
[
'train'
][
'x'
]
train_y
=
linear_model_factor_data
[
'train'
][
'y'
]
ref_dates
=
sorted
(
train_x
.
keys
())
predict_x
=
linear_model_factor_data
[
'predict'
][
'x'
]
predict_y
=
linear_model_factor_data
[
'predict'
][
'y'
]
settlement
=
linear_model_factor_data
[
'settlement'
]
linear_model_features
=
linear_model_factor_data
[
'x_names'
]
const_model_factor_data
=
engine
.
fetch_data_range
(
universe
,
const_features
,
dates
=
ref_dates
,
benchmark
=
benchmark_code
)[
'factor'
]
const_return_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
ref_dates
,
horizon
=
horizon
)
"""
Training phase
"""
models_series
=
pd
.
Series
()
for
ref_date
in
ref_dates
:
x
=
train_x
[
ref_date
]
y
=
train_y
[
ref_date
]
.
flatten
()
model
=
LinearRegression
(
linear_model_features
,
fit_intercept
=
False
)
model
.
fit
(
x
,
y
)
models_series
.
loc
[
ref_date
]
=
model
alpha_logger
.
info
(
'trade_date: {0} training finished'
.
format
(
ref_date
))
"""
Predicting and rebalance phase
"""
frequency
=
'1d'
horizon
=
map_freq
(
frequency
)
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
frequency
,
calendar
=
'china.sse'
,
dateGenerationRule
=
DateGeneration
.
Forward
)
total_factors
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'ivr'
:
LAST
(
'IVR'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
),
'divp'
:
LAST
(
'DivP'
)
}
all_data
=
engine
.
fetch_data_range
(
universe
,
total_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
rets
=
[]
turn_overs
=
[]
leverags
=
[]
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
]
codes
=
data
.
code
.
tolist
()
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
alpha_logger
.
info
(
'{0}: {1}'
.
format
(
date
,
len
(
total_data
)))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
industry
=
total_data
.
industry_code
.
values
dx_return
=
total_data
.
dx
.
values
benchmark_w
=
total_data
.
weight
.
values
constraint_exp
=
total_data
[
constraint_risk
]
.
values
risk_exp_expand
=
np
.
concatenate
((
constraint_exp
,
np
.
ones
((
len
(
risk_exp
),
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
=
constraint_risk
+
[
'total'
]
risk_target
=
risk_exp_expand
.
T
@
benchmark_w
lbound
=
np
.
zeros
(
len
(
total_data
))
ubound
=
0.01
+
benchmark_w
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
i
,
name
in
enumerate
(
risk_names
):
if
name
==
'total'
or
name
==
'SIZE'
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
i
],
upper_bound
=
risk_target
[
i
])
else
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
i
]
*
industry_lower
,
upper_bound
=
risk_target
[
i
]
*
industry_upper
)
factor_values
=
factor_processing
(
total_data
[
const_features
]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
# const linear model
er1
=
const_model
.
predict
(
factor_values
)
# linear regression model
models
=
models_series
[
models_series
.
index
<=
date
]
model
=
models
[
-
1
]
x
=
predict_x
[
date
]
er2
=
model
.
predict
(
x
)
# combine model
er1_table
=
pd
.
DataFrame
({
'er1'
:
er1
/
er1
.
std
(),
'code'
:
total_data
.
code
.
values
})
er2_table
=
pd
.
DataFrame
({
'er2'
:
er2
/
er2
.
std
(),
'code'
:
settlement
.
loc
[
settlement
.
trade_date
==
date
,
'code'
]
.
values
})
er_table
=
pd
.
merge
(
er1_table
,
er2_table
,
on
=
[
'code'
],
how
=
'left'
)
.
fillna
(
0
)
er
=
(
er_table
.
er1
+
er_table
.
er2
)
.
values
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
dx_return
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
use_rank
=
use_rank
)
target_pos
[
'code'
]
=
total_data
[
'code'
]
.
values
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
executed_codes
=
executed_pos
.
code
.
tolist
()
dx_returns
=
engine
.
fetch_dx_return
(
date
,
executed_codes
,
horizon
=
horizon
)
result
=
pd
.
merge
(
executed_pos
,
total_data
[[
'code'
,
'weight'
]],
on
=
[
'code'
],
how
=
'inner'
)
result
=
pd
.
merge
(
result
,
dx_returns
,
on
=
[
'code'
])
leverage
=
result
.
weight_x
.
abs
()
.
sum
()
ret
=
(
result
.
weight_x
-
result
.
weight_y
*
leverage
/
result
.
weight_y
.
sum
())
.
values
@
np
.
exp
(
result
.
dx
.
values
)
rets
.
append
(
ret
)
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
leverags
.
append
(
leverage
)
alpha_logger
.
info
(
'{0} is finished'
.
format
(
date
))
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
,
'leverage'
:
leverage
},
index
=
ref_dates
)
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ref_dates
[
-
1
],
frequency
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'tc_cost'
]
=
ret_df
.
turn_over
*
0.002
ret_df
[[
'returns'
,
'tc_cost'
]]
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
),
title
=
'Fixed frequency rebalanced: {0}'
.
format
(
frequency
),
secondary_y
=
'tc_cost'
)
plt
.
show
()
alphamind/examples/example_101.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-12-30
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
PyFin.api
import
*
from
alphamind.api
import
*
engine
=
SqlEngine
()
start_date
=
'2017-01-01'
end_date
=
'2017-12-25'
universe
=
Universe
(
'custom'
,
[
'zz800'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
factors
=
[
CSRes
(
LAST
(
'closePrice'
)
/
LAST
(
'openPrice'
),
LAST
(
'turnoverVol'
)),
LAST
(
'lowestPrice'
)]
benchmark
=
300
build_type
=
'risk_neutral'
freq
=
'5b'
horizon
=
map_freq
(
freq
)
factors_data
=
fetch_data_package
(
engine
,
alpha_factors
=
factors
,
start_date
=
start_date
,
end_date
=
end_date
,
frequency
=
freq
,
universe
=
universe
,
benchmark
=
benchmark
,
batch
=
1
,
neutralized_risk
=
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
])
x_names
=
factors_data
[
'x_names'
]
train_x
=
factors_data
[
'train'
][
'x'
]
train_y
=
factors_data
[
'train'
][
'y'
]
ref_dates
=
sorted
(
train_x
.
keys
())
predict_x
=
factors_data
[
'predict'
][
'x'
]
settlement
=
factors_data
[
'settlement'
]
benchmark_w
=
settlement
[
'weight'
]
.
values
industry_names
=
settlement
[
'industry'
]
.
values
realized_r
=
settlement
[
'dx'
]
.
values
risk_exp
=
settlement
[
neutralize_risk
]
.
values
"""
Training phase
"""
models_series
=
pd
.
Series
()
for
date
in
ref_dates
:
x
=
train_x
[
date
]
y
=
train_y
[
date
]
.
flatten
()
model
=
LinearRegression
(
fit_intercept
=
False
,
features
=
x_names
)
model
.
fit
(
x
,
y
)
models_series
.
loc
[
date
]
=
model
alpha_logger
.
info
(
'trade_date: {0} training finished'
.
format
(
date
))
"""
Predicting and re-balance phase
"""
index_dates
=
[]
final_res
=
np
.
zeros
(
len
(
ref_dates
))
for
i
,
date
in
enumerate
(
ref_dates
):
this_date_x
=
predict_x
[
date
]
index
=
settlement
.
trade_date
==
date
this_benchmark_w
=
benchmark_w
[
index
]
this_industry_names
=
industry_names
[
index
]
this_realized_r
=
realized_r
[
index
]
# linear regression model
model
=
models_series
[
date
]
predict_y
=
model
.
predict
(
this_date_x
)
# set constraint
this_risk_exp
=
risk_exp
[
index
]
lbound
=
np
.
zeros
(
len
(
this_date_x
))
ubound
=
0.02
*
np
.
ones
(
len
(
this_date_x
))
cons
=
Constraints
()
cons
.
add_exposure
(
neutralize_risk
,
this_risk_exp
)
risk_target
=
this_risk_exp
.
T
@
this_benchmark_w
for
k
,
name
in
enumerate
(
neutralize_risk
):
cons
.
set_constraints
(
name
,
risk_target
[
k
],
risk_target
[
k
])
weights
,
analysis
=
er_portfolio_analysis
(
predict_y
,
this_industry_names
,
this_realized_r
,
constraints
=
cons
,
detail_analysis
=
True
,
benchmark
=
this_benchmark_w
,
method
=
build_type
)
final_res
[
i
]
=
analysis
[
'er'
][
'total'
]
alpha_logger
.
info
(
'trade_date: {0} predicting finished'
.
format
(
date
))
# Plot the cumulative returns
df
=
pd
.
Series
(
final_res
,
index
=
ref_dates
)
df
.
sort_index
(
inplace
=
True
)
df
.
cumsum
()
.
plot
()
plt
.
title
(
'Factors model {1} ({0})'
.
format
(
build_type
,
models_series
.
iloc
[
0
]
.
__class__
.
__name__
))
plt
.
show
()
\ No newline at end of file
alphamind/examples/factor_analysis_example.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
plt
.
style
.
use
(
'ggplot'
)
"""
Back test parameter settings
"""
start_date
=
'2010-01-01'
end_date
=
'2018-02-27'
freq
=
'10b'
industry_lower
=
1.
industry_upper
=
1.
neutralized_risk
=
industry_styles
industry_name
=
'sw'
industry_level
=
1
turn_over_target_base
=
2.0
benchmark_total_lower
=
0.8
benchmark_total_upper
=
1.0
batch
=
0
horizon
=
map_freq
(
freq
)
weight_gap
=
0.01
universe
=
Universe
(
"custom"
,
[
'zz800'
])
data_source
=
'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
benchmark_code
=
905
offset
=
1
executor
=
NaiveExecutor
()
ref_dates
=
makeSchedule
(
start_date
,
end_date
,
freq
,
'china.sse'
)
engine
=
SqlEngine
(
data_source
)
alpha_factors
=
{
'f01'
:
LAST
(
'ep_q'
),
'f02'
:
LAST
(
'roe_q'
),
'f03'
:
LAST
(
'market_confidence_25d'
),
'f04'
:
LAST
(
'ILLIQUIDITY'
),
'f05'
:
LAST
(
'cfinc1_q'
),
'f06'
:
LAST
(
'CFO2EV'
),
'f07'
:
LAST
(
'IVR'
),
'f08'
:
LAST
(
'con_pe_rolling_order'
),
'f09'
:
LAST
(
'con_pb_rolling_order'
),
}
weights
=
dict
(
f01
=
1.
,
f02
=
1.
,
f03
=
0.25
,
f04
=
0.25
,
f05
=
0.25
,
f06
=
0.25
,
f07
=
0.25
,
f08
=-
0.25
,
f09
=-
0.25
)
alpha_model
=
ConstLinearModel
(
features
=
alpha_factors
,
weights
=
weights
)
def
train_worker
(
ref_date
):
data_meta
=
DataMeta
(
freq
=
freq
,
universe
=
universe
,
batch
=
batch
,
neutralized_risk
=
neutralized_risk
,
risk_model
=
'short'
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
,
rank
],
warm_start
=
0
,
data_source
=
data_source
)
return
train_model
(
ref_date
,
alpha_model
,
data_meta
)
def
predict_worker
(
params
):
data_meta
=
DataMeta
(
freq
=
freq
,
universe
=
universe
,
batch
=
batch
,
neutralized_risk
=
neutralized_risk
,
risk_model
=
'short'
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
,
rank
],
warm_start
=
0
,
data_source
=
data_source
)
ref_date
,
model
=
params
er
=
predict_by_model
(
ref_date
,
model
,
data_meta
)
return
er
predicts
=
[
predict_worker
((
d
.
strftime
(
'
%
Y-
%
m-
%
d'
),
alpha_model
))
for
d
in
ref_dates
]
# rebalance
industry_names
=
industry_list
(
industry_name
,
industry_level
)
constraint_risk
=
[
'SIZE'
,
'SIZENL'
,
'BETA'
]
+
industry_names
total_risk_names
=
constraint_risk
+
[
'benchmark'
,
'total'
]
b_type
=
[]
l_val
=
[]
u_val
=
[]
previous_pos
=
pd
.
DataFrame
()
rets
=
[]
turn_overs
=
[]
leverags
=
[]
for
name
in
total_risk_names
:
if
name
==
'benchmark'
:
b_type
.
append
(
BoundaryType
.
RELATIVE
)
l_val
.
append
(
benchmark_total_lower
)
u_val
.
append
(
benchmark_total_upper
)
elif
name
in
{
'SIZE'
,
'SIZENL'
,
'BETA'
}:
b_type
.
append
(
BoundaryType
.
ABSOLUTE
)
l_val
.
append
(
0.0
)
u_val
.
append
(
0.0
)
else
:
b_type
.
append
(
BoundaryType
.
RELATIVE
)
l_val
.
append
(
industry_lower
)
u_val
.
append
(
industry_upper
)
bounds
=
create_box_bounds
(
total_risk_names
,
b_type
,
l_val
,
u_val
)
industry_total
=
engine
.
fetch_industry_matrix_range
(
universe
,
dates
=
ref_dates
,
category
=
industry_name
,
level
=
industry_level
)
benchmark_total
=
engine
.
fetch_benchmark_range
(
dates
=
ref_dates
,
benchmark
=
benchmark_code
)
risk_total
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
ref_dates
)[
1
]
for
i
,
ref_date
in
enumerate
(
ref_dates
):
ref_date
=
ref_date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
industry_matrix
=
industry_total
[
industry_total
.
trade_date
==
ref_date
]
benchmark_w
=
benchmark_total
[
benchmark_total
.
trade_date
==
ref_date
]
risk_matrix
=
risk_total
[
risk_total
.
trade_date
==
ref_date
]
res
=
pd
.
merge
(
industry_matrix
,
benchmark_w
,
on
=
[
'code'
],
how
=
'left'
)
.
fillna
(
0.
)
res
=
pd
.
merge
(
res
,
risk_matrix
,
on
=
[
'code'
])
res
=
res
.
dropna
()
codes
=
res
.
code
.
values
.
tolist
()
benchmark_w
=
res
.
weight
.
values
is_in_benchmark
=
(
benchmark_w
>
0.
)
.
astype
(
float
)
.
reshape
((
-
1
,
1
))
total_risk_exp
=
np
.
concatenate
([
res
[
constraint_risk
]
.
values
.
astype
(
float
),
is_in_benchmark
,
np
.
ones_like
(
is_in_benchmark
)],
axis
=
1
)
total_risk_exp
=
pd
.
DataFrame
(
total_risk_exp
,
columns
=
total_risk_names
)
constraints
=
LinearConstraints
(
bounds
,
total_risk_exp
,
benchmark_w
)
lbound
=
np
.
maximum
(
0.
,
benchmark_w
-
weight_gap
)
# np.zeros(len(total_data))
ubound
=
weight_gap
+
benchmark_w
if
previous_pos
.
empty
:
current_position
=
None
turn_over_target
=
None
else
:
previous_pos
.
set_index
(
'code'
,
inplace
=
True
)
remained_pos
=
previous_pos
.
loc
[
codes
]
remained_pos
.
fillna
(
0.
,
inplace
=
True
)
turn_over_target
=
turn_over_target_base
current_position
=
remained_pos
.
weight
.
values
er
=
predicts
[
i
]
.
loc
[
codes
]
.
values
try
:
alpha_logger
.
info
(
'{0} partial re-balance: {1}'
.
format
(
ref_date
,
len
(
er
)))
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry_matrix
.
industry_name
.
values
,
None
,
constraints
,
False
,
benchmark_w
,
method
=
'risk_neutral'
,
turn_over_target
=
turn_over_target
,
current_position
=
current_position
,
lbound
=
lbound
,
ubound
=
ubound
)
except
ValueError
:
alpha_logger
.
info
(
'{0} full re-balance: {1}'
.
format
(
ref_date
,
len
(
er
)))
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry_matrix
.
industry_name
.
values
,
None
,
constraints
,
False
,
benchmark_w
,
method
=
'risk_neutral'
,
lbound
=
lbound
,
ubound
=
ubound
)
target_pos
[
'code'
]
=
codes
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
executed_codes
=
executed_pos
.
code
.
tolist
()
dx_returns
=
engine
.
fetch_dx_return
(
ref_date
,
executed_codes
,
horizon
=
horizon
,
offset
=
offset
)
result
=
pd
.
merge
(
executed_pos
,
dx_returns
,
on
=
[
'code'
])
leverage
=
result
.
weight
.
abs
()
.
sum
()
ret
=
result
.
weight
.
values
@
(
np
.
exp
(
result
.
dx
.
values
)
-
1.
)
rets
.
append
(
np
.
log
(
1.
+
ret
))
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
leverags
.
append
(
leverage
)
previous_pos
=
executed_pos
alpha_logger
.
info
(
'{0} is finished'
.
format
(
ref_date
))
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
,
'leverage'
:
leverags
},
index
=
ref_dates
)
# index return
index_return
=
engine
.
fetch_dx_return_index_range
(
benchmark_code
,
start_date
,
end_date
,
horizon
=
horizon
,
offset
=
offset
)
.
set_index
(
'trade_date'
)
ret_df
[
'index'
]
=
index_return
[
'dx'
]
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ref_dates
[
-
1
],
freq
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'tc_cost'
]
=
ret_df
.
turn_over
*
0.002
ret_df
[
'returns'
]
=
ret_df
[
'returns'
]
-
ret_df
[
'index'
]
*
ret_df
[
'leverage'
]
ret_df
[[
'returns'
,
'tc_cost'
]]
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
),
title
=
'Fixed freq rebalanced: {0} with benchmark {1}'
.
format
(
freq
,
905
),
secondary_y
=
'tc_cost'
)
ret_df
[[
'returns'
,
'tc_cost'
]][
-
30
:]
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
),
title
=
'Fixed freq rebalanced: {0} with benchmark {1}'
.
format
(
freq
,
905
),
secondary_y
=
'tc_cost'
)
plt
.
show
()
alphamind/examples/factor_res_analysis.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
def
factor_residue_analysis
(
start_date
,
end_date
,
factor_name
,
factor
,
freq
,
universe
,
engine
):
neutralize_risk
=
[
'SIZE'
,
'LEVERAGE'
]
+
industry_styles
n_bins
=
5
horizon
=
map_freq
(
freq
)
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
alpha_factor_name
=
factor_name
+
'_res'
alpha_factor
=
{
alpha_factor_name
:
factor
}
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
final_res
=
np
.
zeros
((
len
(
factor_groups
.
groups
),
n_bins
))
index_dates
=
[]
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'code'
,
alpha_factor_name
,
'isOpen'
]
+
neutralize_risk
]
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
index_dates
.
append
(
date
)
try
:
er
=
factor_processing
(
total_data
[[
alpha_factor_name
]]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
res
=
er_quantile_analysis
(
er
,
n_bins
=
n_bins
,
dx_return
=
dx_return
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
df
=
pd
.
DataFrame
(
final_res
,
index
=
index_dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1d'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
[
'$top1 - bottom1$'
]
=
df
[
4
]
-
df
[
0
]
return
df
def
factor_analysis
(
f_name
):
from
alphamind.api
import
SqlEngine
,
Universe
,
alpha_logger
engine
=
SqlEngine
()
universe
=
Universe
(
'custom'
,
[
'zz800'
])
base1
=
LAST
(
'Alpha60'
)
base2
=
CSRes
(
'roe_q'
,
base1
)
base3
=
CSRes
(
CSRes
(
'ep_q'
,
base1
),
base2
)
factor
=
CSRes
(
CSRes
(
CSRes
(
LAST
(
f_name
),
base1
),
base2
),
base3
)
res
=
factor_residue_analysis
(
'2010-01-01'
,
'2018-01-26'
,
f_name
,
factor
,
'10b'
,
universe
,
engine
)
alpha_logger
.
info
(
'{0} is done'
.
format
(
f_name
))
return
f_name
,
res
if
__name__
==
'__main__'
:
from
dask.distributed
import
Client
client
=
Client
(
'10.63.6.13:8786'
)
engine
=
SqlEngine
()
df
=
engine
.
fetch_factor_coverage
()
df
=
df
[
df
.
universe
==
'zz800'
]
.
groupby
(
'factor'
)
.
mean
()
df
=
df
[
df
.
coverage
>=
0.98
]
universe
=
Universe
(
'custom'
,
[
'zz800'
])
factor_df
=
pd
.
DataFrame
()
tasks
=
client
.
map
(
factor_analysis
,
df
.
index
.
tolist
())
res
=
client
.
gather
(
tasks
)
for
f_name
,
df
in
res
:
factor_df
[
f_name
]
=
df
[
'$top1 - bottom1$'
]
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
alphamind/examples/filter_example.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-9-5
@author: cheng.li
"""
import
pandas
as
pd
import
numpy
as
np
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
plt
.
style
.
use
(
'ggplot'
)
sentiment_df
=
pd
.
read_csv
(
'd:/xueqiu.csv'
,
parse_dates
=
[
'trade_date'
])
.
sort_values
([
'trade_date'
,
'code'
])
.
set_index
(
'trade_date'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
index_name
=
'zz500'
benchmark
=
905
universe
=
Universe
(
index_name
,
[
index_name
])
neutralized_risk
=
[
'SIZE'
]
+
industry_styles
expression
=
MA
(
5
,
[
'post'
])
n_bins
=
5
frequency
=
'1w'
new_factor_df
=
expression
.
transform
(
sentiment_df
,
name
=
'xueqiu'
,
category_field
=
'code'
)
.
reset_index
()
factors
=
[
'RVOL'
,
'EPS'
,
'CFinc1'
,
'BDTO'
,
'VAL'
,
'CHV'
,
'GREV'
,
'ROEDiluted'
]
weights
=
np
.
array
([
0.015881607
,
-
0.015900173
,
-
0.001792638
,
0.014277867
,
0.034129344
,
0.019044573
,
0.042747382
,
0.048765746
])
start_datge
=
'2016-01-01'
end_date
=
'2017-09-03'
dates
=
makeSchedule
(
start_datge
,
end_date
,
frequency
,
'china.sse'
)
total_data
=
engine
.
fetch_data_range
(
universe
,
factors
,
dates
=
dates
,
benchmark
=
benchmark
)
return_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
4
)
settle_df
=
total_data
[
'factor'
]
settle_df
=
pd
.
merge
(
settle_df
,
new_factor_df
,
on
=
[
'trade_date'
,
'code'
])
settle_df
=
pd
.
merge
(
settle_df
,
return_data
,
on
=
[
'trade_date'
,
'code'
])
settle_df
.
dropna
(
inplace
=
True
)
settle_df
.
set_index
(
'trade_date'
,
inplace
=
True
)
dates
=
settle_df
.
index
.
unique
()
final_res
=
np
.
zeros
(
len
(
dates
))
for
i
,
date
in
enumerate
(
dates
):
risk_exp
=
settle_df
.
loc
[
date
,
neutralized_risk
]
.
values
raw_factor
=
settle_df
.
loc
[
date
,
factors
]
.
values
@
weights
dx_return
=
settle_df
.
loc
[
date
,
'dx'
]
.
values
benchmark_w
=
settle_df
.
loc
[
date
,
'weight'
]
.
values
neutralized_factor
=
factor_processing
(
raw_factor
.
reshape
((
-
1
,
1
)),
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
standardize
])
is_tradable
=
settle_df
.
loc
[
date
,
'isOpen'
]
.
values
.
copy
()
xueqiu_values
=
settle_df
.
loc
[
date
,
'xueqiu'
]
.
values
top_p
=
np
.
percentile
(
xueqiu_values
,
95
)
is_tradable
[
xueqiu_values
>
top_p
]
=
False
industry
=
settle_df
.
loc
[
date
,
'industry'
]
.
values
constraints
=
Constraints
(
np
.
ones
((
len
(
is_tradable
),
1
)),
[
'total'
])
constraints
.
set_constraints
(
'total'
,
benchmark_w
.
sum
(),
benchmark_w
.
sum
())
res
=
er_portfolio_analysis
(
neutralized_factor
,
industry
,
dx_return
=
dx_return
,
method
=
'risk_neutral'
,
constraints
=
constraints
,
is_tradable
=
is_tradable
,
benchmark
=
benchmark_w
)
final_res
[
i
]
=
res
[
1
][
'er'
][
'total'
]
print
(
'{0} is finished'
.
format
(
date
))
alphamind/examples/formula_expression.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
plt
.
style
.
use
(
'ggplot'
)
import
datetime
as
dt
start
=
dt
.
datetime
.
now
()
universe
=
Universe
(
'custom'
,
[
'zz800'
])
factor_name
=
'Beta20'
base1
=
LAST
(
'roe_q'
)
base2
=
CSRes
(
LAST
(
'ep_q'
),
'roe_q'
)
simple_expression
=
CSRes
(
CSRes
(
LAST
(
factor_name
),
base1
),
base2
)
alpha_factor_name
=
factor_name
+
'_res'
alpha_factor
=
{
alpha_factor_name
:
simple_expression
}
# end of formula definition
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
neutralize_risk
=
[
'SIZE'
,
'LEVERAGE'
]
+
industry_styles
freq
=
'5b'
n_bins
=
5
horizon
=
map_freq
(
freq
)
start_date
=
'2012-01-01'
end_date
=
'2018-01-05'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
final_res
=
np
.
zeros
((
len
(
factor_groups
.
groups
),
n_bins
))
index_dates
=
[]
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'code'
,
alpha_factor_name
,
'isOpen'
]
+
neutralize_risk
]
codes
=
data
.
code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
index_dates
.
append
(
date
)
f_data
=
total_data
[[
alpha_factor_name
]]
try
:
er
=
factor_processing
(
total_data
[[
alpha_factor_name
]]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
res
=
er_quantile_analysis
(
er
,
n_bins
=
n_bins
,
dx_return
=
dx_return
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
df
=
pd
.
DataFrame
(
final_res
,
index
=
index_dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1d'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
fig
,
axes
=
plt
.
subplots
(
1
,
2
,
figsize
=
(
18
,
6
))
df
=
df
.
cumsum
()
.
plot
(
ax
=
axes
[
0
],
title
=
'Quantile Analysis for {0}'
.
format
(
alpha_factor_name
))
# =================================================================== #
alpha_factor_name
=
alpha_factor_name
+
'_1w_diff'
alpha_factor
=
{
alpha_factor_name
:
DIFF
(
simple_expression
)}
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
final_res
=
np
.
zeros
((
len
(
factor_groups
.
groups
),
n_bins
))
index_dates
=
[]
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'code'
,
alpha_factor_name
,
'isOpen'
]
+
neutralize_risk
]
codes
=
data
.
code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
index_dates
.
append
(
date
)
f_data
=
total_data
[[
alpha_factor_name
]]
try
:
er
=
factor_processing
(
total_data
[[
alpha_factor_name
]]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
res
=
er_quantile_analysis
(
er
,
n_bins
=
n_bins
,
dx_return
=
dx_return
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
df
=
pd
.
DataFrame
(
final_res
,
index
=
index_dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1d'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
(
ax
=
axes
[
1
],
title
=
'Quantile Analysis for {0}'
.
format
(
alpha_factor_name
))
plt
.
show
()
print
(
dt
.
datetime
.
now
()
-
start
)
\ No newline at end of file
alphamind/examples/model_training.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-8-24
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
import
copy
from
sklearn.linear_model
import
*
from
sklearn.ensemble
import
RandomForestRegressor
from
sklearn.ensemble
import
AdaBoostRegressor
from
sklearn.svm
import
NuSVR
from
alphamind.api
import
*
from
PyFin.api
import
*
from
matplotlib
import
pyplot
as
plt
plt
.
style
.
use
(
'ggplot'
)
'''
Settings:
universe - zz500
neutralize - all industries
benchmark - zz500
base factors - all the risk styles
quantiles - 5
start_date - 2012-01-01
end_date - 2017-08-01
re-balance - 2 week
training - every 8 week
'''
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
neutralize_risk
=
industry_styles
portfolio_risk_neutralize
=
[]
portfolio_industry_neutralize
=
True
alpha_factors
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'rvol'
:
LAST
(
'RVOL'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
),
'droeafternonorecurring'
:
LAST
(
'DROEAfterNonRecurring'
)}
benchmark
=
905
n_bins
=
5
frequency
=
'2w'
batch
=
8
start_date
=
'2012-01-01'
end_date
=
'2017-11-05'
method
=
'risk_neutral'
use_rank
=
100
'''
fetch data from target data base and do the corresponding data processing
'''
data_package
=
fetch_data_package
(
engine
,
alpha_factors
=
alpha_factors
,
start_date
=
start_date
,
end_date
=
end_date
,
frequency
=
frequency
,
universe
=
universe
,
benchmark
=
benchmark
,
batch
=
batch
,
neutralized_risk
=
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
],
warm_start
=
batch
)
'''
training phase: using Linear - regression from scikit-learn
'''
train_x
=
data_package
[
'train'
][
'x'
]
train_y
=
data_package
[
'train'
][
'y'
]
dates
=
sorted
(
train_x
.
keys
())
model_df
=
pd
.
Series
()
features
=
data_package
[
'x_names'
]
for
train_date
in
dates
:
model
=
LinearRegression
(
features
,
fit_intercept
=
False
)
x
=
train_x
[
train_date
]
y
=
train_y
[
train_date
]
model
.
fit
(
x
,
y
)
model_df
.
loc
[
train_date
]
=
model
alpha_logger
.
info
(
'trade_date: {0} training finished'
.
format
(
train_date
))
'''
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
predict_x
=
data_package
[
'predict'
][
'x'
]
settlement
=
data_package
[
'settlement'
]
industry_dummies
=
pd
.
get_dummies
(
settlement
[
'industry'
]
.
values
)
risk_styles
=
settlement
[
portfolio_risk_neutralize
]
.
values
final_res
=
np
.
zeros
(
len
(
dates
))
for
i
,
predict_date
in
enumerate
(
dates
):
model
=
model_df
[
predict_date
]
x
=
predict_x
[
predict_date
]
cons
=
Constraints
()
index
=
settlement
.
trade_date
==
predict_date
benchmark_w
=
settlement
[
index
][
'weight'
]
.
values
realized_r
=
settlement
[
index
][
'dx'
]
.
values
industry_names
=
settlement
[
index
][
'industry'
]
.
values
is_tradable
=
settlement
[
index
][
'isOpen'
]
.
values
cons
.
add_exposure
([
'total'
],
np
.
ones
((
len
(
is_tradable
),
1
)))
cons
.
set_constraints
(
'total'
,
benchmark_w
.
sum
(),
benchmark_w
.
sum
())
if
portfolio_industry_neutralize
:
ind_exp
=
industry_dummies
[
index
]
risk_tags
=
ind_exp
.
columns
cons
.
add_exposure
(
risk_tags
,
ind_exp
.
values
)
benchmark_exp
=
benchmark_w
@
ind_exp
.
values
for
k
,
name
in
enumerate
(
risk_tags
):
cons
.
set_constraints
(
name
,
benchmark_exp
[
k
],
benchmark_exp
[
k
])
if
portfolio_risk_neutralize
:
risk_exp
=
risk_styles
[
index
]
risk_tags
=
np
.
array
(
portfolio_risk_neutralize
)
cons
.
add_exposure
(
risk_tags
,
risk_exp
)
benchmark_exp
=
benchmark_w
@
risk_exp
for
k
,
name
in
enumerate
(
risk_tags
):
cons
.
set_constraints
(
name
,
benchmark_exp
[
k
],
benchmark_exp
[
k
])
predict_y
=
model
.
predict
(
x
)
is_tradable
[:]
=
True
weights
,
analysis
=
er_portfolio_analysis
(
predict_y
,
industry_names
,
realized_r
,
constraints
=
cons
,
detail_analysis
=
True
,
benchmark
=
benchmark_w
,
is_tradable
=
is_tradable
,
method
=
method
,
use_rank
=
use_rank
)
final_res
[
i
]
=
analysis
[
'er'
][
'total'
]
/
benchmark_w
.
sum
()
alpha_logger
.
info
(
'trade_date: {0} predicting finished'
.
format
(
predict_date
))
last_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
df
=
pd
.
Series
(
final_res
,
index
=
dates
[
1
:]
+
[
last_date
])
df
.
sort_index
(
inplace
=
True
)
df
.
cumsum
()
.
plot
()
plt
.
title
(
'Prod factors model {1} ({0})'
.
format
(
method
,
model
.
__class__
.
__name__
))
plt
.
show
()
alphamind/examples/model_zoo.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-9-5
@author: cheng.li
"""
import
math
import
pandas
as
pd
import
numpy
as
np
from
PyFin.api
import
*
from
alphamind.api
import
*
factor
=
'ROE'
universe
=
Universe
(
'custom'
,
[
'zz800'
])
start_date
=
'2010-01-01'
end_date
=
'2018-04-26'
freq
=
'10b'
category
=
'sw_adj'
level
=
1
horizon
=
map_freq
(
freq
)
ref_dates
=
makeSchedule
(
start_date
,
end_date
,
freq
,
'china.sse'
)
def
factor_analysis
(
factor
):
engine
=
SqlEngine
()
factors
=
{
'f1'
:
CSQuantiles
(
factor
),
'f2'
:
CSQuantiles
(
factor
,
groups
=
'sw1_adj'
),
'f3'
:
LAST
(
factor
)
}
total_factor
=
engine
.
fetch_factor_range
(
universe
,
factors
,
dates
=
ref_dates
)
_
,
risk_exp
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
ref_dates
)
industry
=
engine
.
fetch_industry_range
(
universe
,
dates
=
ref_dates
,
category
=
category
,
level
=
level
)
rets
=
engine
.
fetch_dx_return_range
(
universe
,
horizon
=
horizon
,
offset
=
1
,
dates
=
ref_dates
)
total_factor
=
pd
.
merge
(
total_factor
,
industry
[[
'trade_date'
,
'code'
,
'industry'
]],
on
=
[
'trade_date'
,
'code'
])
total_factor
=
pd
.
merge
(
total_factor
,
risk_exp
,
on
=
[
'trade_date'
,
'code'
])
total_factor
=
pd
.
merge
(
total_factor
,
rets
,
on
=
[
'trade_date'
,
'code'
])
.
dropna
()
df_ret
=
pd
.
DataFrame
(
columns
=
[
'f1'
,
'f2'
,
'f3'
])
df_ic
=
pd
.
DataFrame
(
columns
=
[
'f1'
,
'f2'
,
'f3'
])
total_factor_groups
=
total_factor
.
groupby
(
'trade_date'
)
for
date
,
this_factors
in
total_factor_groups
:
raw_factors
=
this_factors
[
'f3'
]
.
values
industry_exp
=
this_factors
[
industry_styles
+
[
'COUNTRY'
]]
.
values
.
astype
(
float
)
processed_values
=
factor_processing
(
raw_factors
,
pre_process
=
[],
risk_factors
=
industry_exp
,
post_process
=
[
percentile
])
this_factors
[
'f3'
]
=
processed_values
factor_values
=
this_factors
[[
'f1'
,
'f2'
,
'f3'
]]
.
values
positions
=
(
factor_values
>=
0.8
)
*
1.
positions
[
factor_values
<=
0.2
]
=
-
1
positions
/=
np
.
abs
(
positions
)
.
sum
(
axis
=
0
)
ret_values
=
this_factors
.
dx
.
values
@
positions
df_ret
.
loc
[
date
]
=
ret_values
ic_values
=
this_factors
[[
'dx'
,
'f1'
,
'f2'
,
'f3'
]]
.
corr
()
.
values
[
0
,
1
:]
df_ic
.
loc
[
date
]
=
ic_values
print
(
f
"{factor} is finished"
)
return
{
'ic'
:
(
df_ic
.
mean
(
axis
=
0
),
df_ic
.
std
(
axis
=
0
)
/
math
.
sqrt
(
len
(
df_ic
))),
'ret'
:
(
df_ret
.
mean
(
axis
=
0
),
df_ret
.
std
(
axis
=
0
)
/
math
.
sqrt
(
len
(
df_ic
))),
'factor'
:
factor
}
if
__name__
==
'__main__'
:
from
dask.distributed
import
Client
try
:
client
=
Client
(
"10.63.6.176:8786"
)
cols
=
pd
.
MultiIndex
.
from_product
([[
'mean'
,
'std'
],
[
'raw'
,
'peer'
,
'neutralized'
]])
factors_ret
=
pd
.
DataFrame
(
columns
=
cols
)
factors_ic
=
pd
.
DataFrame
(
columns
=
cols
)
factors
=
[
'ep_q'
,
'roe_q'
,
'SGRO'
,
'GREV'
,
'IVR'
,
'ILLIQUIDITY'
,
'con_target_price'
,
'con_pe_rolling_order'
,
'DividendPaidRatio'
]
l
=
client
.
map
(
factor_analysis
,
factors
)
results
=
client
.
gather
(
l
)
for
res
in
results
:
factor
=
res
[
'factor'
]
factors_ret
.
loc
[
factor
,
'mean'
]
=
res
[
'ret'
][
0
]
.
values
factors_ret
.
loc
[
factor
,
'std'
]
=
res
[
'ret'
][
1
]
.
values
factors_ic
.
loc
[
factor
,
'mean'
]
=
res
[
'ic'
][
0
]
.
values
factors_ic
.
loc
[
factor
,
'std'
]
=
res
[
'ic'
][
1
]
.
values
print
(
factors_ret
)
finally
:
client
.
close
()
alphamind/examples/plot_quantile_res.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import
datetime
as
dt
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
start
=
dt
.
datetime
.
now
()
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
n_bins
=
5
factor_weights
=
np
.
array
([
1.
])
freq
=
'1w'
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
start_date
=
'2012-01-01'
end_date
=
'2012-08-01'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
)
prod_factors
=
[
'EPS'
]
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_all_data
=
all_data
[
'factor'
]
total_df
=
pd
.
DataFrame
()
for
factor
in
prod_factors
:
factors
=
[
factor
]
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'code'
,
factor
,
'isOpen'
,
'weight'
]
+
neutralize_risk
]
codes
=
data
.
code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
print
(
'{0}: {1}'
.
format
(
date
,
len
(
data
)))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
benchmark
=
total_data
.
weight
.
values
f_data
=
total_data
[
factors
]
try
:
res
=
quantile_analysis
(
f_data
,
factor_weights
,
dx_return
,
risk_exp
=
risk_exp
,
n_bins
=
n_bins
,
benchmark
=
benchmark
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1w'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
plt
.
title
(
'{0} weekly re-balance'
.
format
(
factors
[
0
]))
plt
.
savefig
(
'{0}_big_universe_20170814.png'
.
format
(
factors
[
0
]))
print
(
'{0} is finished'
.
format
(
factor
))
print
(
dt
.
datetime
.
now
()
-
start
)
plt
.
show
()
\ No newline at end of file
alphamind/examples/quantile_analysis_example.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
alphamind.data.dbmodel.models
import
Uqer
from
alphamind.data.dbmodel.models
import
Tiny
from
alphamind.data.dbmodel.models
import
LegacyFactor
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
n_bins
=
24
factor_weights
=
np
.
array
([
1.
])
freq
=
'1w'
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
start_date
=
'2016-04-01'
end_date
=
'2017-08-16'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
col_names
=
set
()
factor_tables
=
[
LegacyFactor
]
for
t
in
factor_tables
:
for
c
in
t
.
__table__
.
columns
:
col_names
.
add
(
c
.
name
)
col_names
=
col_names
.
difference
(
set
([
'trade_date'
,
'code'
]))
prod_factors
=
list
(
col_names
)
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_all_data
=
all_data
[
'factor'
]
total_df
=
pd
.
DataFrame
()
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
for
date
,
factor_data
in
factor_groups
:
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
final_res
=
np
.
zeros
((
len
(
prod_factors
),
n_bins
))
this_date_data
=
factor_data
[[
'code'
,
'isOpen'
,
'weight'
]
+
prod_factors
+
neutralize_risk
]
this_date_data
=
pd
.
merge
(
this_date_data
,
returns
,
on
=
[
'code'
])
codes
=
this_date_data
.
code
.
tolist
()
for
i
,
factor
in
enumerate
(
prod_factors
):
factors
=
[
factor
]
total_data
=
this_date_data
[[
'code'
,
'isOpen'
,
'weight'
,
'dx'
]
+
factors
+
neutralize_risk
]
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
benchmark
=
total_data
.
weight
.
values
f_data
=
total_data
[
factors
]
try
:
res
=
quantile_analysis
(
f_data
,
factor_weights
,
dx_return
,
risk_exp
=
risk_exp
,
n_bins
=
n_bins
,
benchmark
=
benchmark
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
prod_factors
)
df
.
sort_index
(
inplace
=
True
)
df
[
'trade_date'
]
=
date
total_df
=
total_df
.
append
(
df
)
print
(
'{0} is finished'
.
format
(
date
))
total_df
.
to_csv
(
'd:/factor_eval_pm500_mirror.csv'
)
alphamind/examples/train_one_day_model.py
deleted
100644 → 0
View file @
f10450af
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
from
alphamind.api
import
*
ref_date
=
'2017-11-21'
universe_name
=
[
'zz500'
,
'hs300'
]
universe
=
Universe
(
universe_name
,
universe_name
)
frequency
=
'5b'
batch
=
8
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
engine
=
SqlEngine
()
linear_model_features
=
[
'eps_q'
,
'roe_q'
,
'BDTO'
,
'CFinc1'
,
'CHV'
,
'IVR'
,
'VAL'
,
'GREV'
]
training_data
=
fetch_train_phase
(
engine
,
linear_model_features
,
ref_date
,
frequency
,
universe
,
batch
,
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
],
warm_start
=
batch
)
model
=
LinearRegression
(
linear_model_features
,
fit_intercept
=
False
)
x
=
training_data
[
'train'
][
'x'
]
y
=
training_data
[
'train'
][
'y'
]
.
flatten
()
model
.
fit
(
x
,
y
)
print
(
model
.
impl
.
coef_
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment