Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
daa642f6
Commit
daa642f6
authored
Nov 20, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
directly fetch return from market table
parent
4dbb172a
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
347 additions
and
95 deletions
+347
-95
sqlengine.py
alphamind/data/engines/sqlengine.py
+18
-17
combined_model_training_daily.py
alphamind/examples/combined_model_training_daily.py
+232
-0
formula_expression.py
alphamind/examples/formula_expression.py
+76
-20
model_training.py
alphamind/examples/model_training.py
+21
-58
No files found.
alphamind/data/engines/sqlengine.py
View file @
daa642f6
...
...
@@ -30,6 +30,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from
alphamind.data.dbmodel.models
import
RiskCovLong
from
alphamind.data.dbmodel.models
import
FullFactor
from
alphamind.data.dbmodel.models
import
Models
from
alphamind.data.dbmodel.models
import
Market
from
alphamind.data.dbmodel.models
import
Universe
as
UniverseTable
from
alphamind.data.transformer
import
Transformer
from
alphamind.model.loader
import
load_model
...
...
@@ -190,19 +191,19 @@ class SqlEngine(object):
start_date
=
ref_date
if
not
expiry_date
:
end_date
=
advanceDateByCalendar
(
'china.sse'
,
ref_date
,
str
(
horizon
+
offset
+
DAILY_RETURN_OFFSET
)
+
'b'
)
.
strftime
(
'
%
Y
%
m
%
d'
)
end_date
=
advanceDateByCalendar
(
'china.sse'
,
ref_date
,
str
(
1
+
horizon
+
offset
+
DAILY_RETURN_OFFSET
)
+
'b'
)
.
strftime
(
'
%
Y
%
m
%
d'
)
else
:
end_date
=
expiry_date
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
over
(
partition_by
=
DailyReturn
.
code
,
order_by
=
DailyReturn
.
trade_date
,
rows
=
(
DAILY_RETURN_OFFSET
+
offset
,
horizon
+
DAILY_RETURN_OFFSET
+
offset
))
.
label
(
'dx'
)
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
Market
.
chgPct
))
.
over
(
partition_by
=
Market
.
code
,
order_by
=
Market
.
trade_date
,
rows
=
(
1
+
DAILY_RETURN_OFFSET
+
offset
,
1
+
horizon
+
DAILY_RETURN_OFFSET
+
offset
))
.
label
(
'dx'
)
query
=
select
([
DailyReturn
.
trade_date
,
DailyReturn
.
code
,
stats
])
.
where
(
query
=
select
([
Market
.
trade_date
,
Market
.
code
,
stats
])
.
where
(
and_
(
DailyReturn
.
trade_date
.
between
(
start_date
,
end_date
),
DailyReturn
.
code
.
in_
(
codes
)
Market
.
trade_date
.
between
(
start_date
,
end_date
),
Market
.
code
.
in_
(
codes
)
)
)
...
...
@@ -223,20 +224,20 @@ class SqlEngine(object):
start_date
=
dates
[
0
]
end_date
=
dates
[
-
1
]
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
horizon
+
offset
+
DAILY_RETURN_OFFSET
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
1
+
horizon
+
offset
+
DAILY_RETURN_OFFSET
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
cond
=
universe
.
query_range
(
start_date
,
end_date
)
big_table
=
join
(
DailyReturn
,
UniverseTable
,
and_
(
DailyReturn
.
trade_date
==
UniverseTable
.
trade_date
,
DailyReturn
.
code
==
UniverseTable
.
code
,
big_table
=
join
(
Market
,
UniverseTable
,
and_
(
Market
.
trade_date
==
UniverseTable
.
trade_date
,
Market
.
code
==
UniverseTable
.
code
,
cond
))
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
over
(
partition_by
=
DailyReturn
.
code
,
order_by
=
DailyReturn
.
trade_date
,
rows
=
(
offset
+
DAILY_RETURN_OFFSET
,
horizon
+
offset
+
DAILY_RETURN_OFFSET
))
.
label
(
'dx'
)
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
Market
.
chgPct
))
.
over
(
partition_by
=
Market
.
code
,
order_by
=
Market
.
trade_date
,
rows
=
(
1
+
offset
+
DAILY_RETURN_OFFSET
,
1
+
horizon
+
offset
+
DAILY_RETURN_OFFSET
))
.
label
(
'dx'
)
query
=
select
([
DailyReturn
.
trade_date
,
DailyReturn
.
code
,
stats
])
\
query
=
select
([
Market
.
trade_date
,
Market
.
code
,
stats
])
\
.
select_from
(
big_table
)
df
=
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
.
dropna
()
...
...
alphamind/examples/combined_model_training_daily.py
0 → 100644
View file @
daa642f6
# -*- coding: utf-8 -*-
"""
Created on 2017-11-8
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
alphamind.api
import
*
from
PyFin.api
import
*
plt
.
style
.
use
(
'ggplot'
)
"""
Back test parameter settings
"""
start_date
=
'2017-01-01'
end_date
=
'2017-11-06'
benchmark_code
=
905
universe_name
=
'zz500'
universe
=
Universe
(
universe_name
,
[
universe_name
])
frequency
=
'2w'
batch
=
4
method
=
'risk_neutral'
use_rank
=
100
industry_lower
=
1.
industry_upper
=
1.
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
constraint_risk
=
[
'SIZE'
]
+
industry_styles
horizon
=
map_freq
(
frequency
)
executor
=
NaiveExecutor
()
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
const_features
=
[
"IVR"
,
"eps_q"
,
"DivP"
,
"CFinc1"
,
"BDTO"
]
const_weights
=
np
.
array
([
0.05
,
0.2
,
0.075
,
0.15
,
0.05
])
const_model
=
ConstLinearModel
(
features
=
const_features
,
weights
=
const_weights
)
linear_model_features
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'ivr'
:
LAST
(
'IVR'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
)
}
"""
Data phase
"""
engine
=
SqlEngine
()
linear_model_factor_data
=
fetch_data_package
(
engine
,
alpha_factors
=
linear_model_features
,
start_date
=
start_date
,
end_date
=
end_date
,
frequency
=
frequency
,
universe
=
universe
,
benchmark
=
benchmark_code
,
batch
=
batch
,
neutralized_risk
=
neutralize_risk
,
pre_process
=
[
winsorize_normal
,
standardize
],
post_process
=
[
winsorize_normal
,
standardize
],
warm_start
=
batch
)
train_x
=
linear_model_factor_data
[
'train'
][
'x'
]
train_y
=
linear_model_factor_data
[
'train'
][
'y'
]
ref_dates
=
sorted
(
train_x
.
keys
())
predict_x
=
linear_model_factor_data
[
'predict'
][
'x'
]
predict_y
=
linear_model_factor_data
[
'predict'
][
'y'
]
settlement
=
linear_model_factor_data
[
'settlement'
]
linear_model_features
=
linear_model_factor_data
[
'x_names'
]
const_model_factor_data
=
engine
.
fetch_data_range
(
universe
,
const_features
,
dates
=
ref_dates
,
benchmark
=
benchmark_code
)[
'factor'
]
const_return_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
ref_dates
,
horizon
=
horizon
)
"""
Training phase
"""
models_series
=
pd
.
Series
()
for
ref_date
in
ref_dates
:
x
=
train_x
[
ref_date
]
y
=
train_y
[
ref_date
]
.
flatten
()
model
=
LinearRegression
(
linear_model_features
,
fit_intercept
=
False
)
model
.
fit
(
x
,
y
)
models_series
.
loc
[
ref_date
]
=
model
alpha_logger
.
info
(
'trade_date: {0} training finished'
.
format
(
ref_date
))
"""
Predicting and rebalance phase
"""
frequency
=
'1d'
horizon
=
map_freq
(
frequency
)
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
frequency
,
calendar
=
'china.sse'
,
dateGenerationRule
=
DateGeneration
.
Forward
)
total_factors
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'ivr'
:
LAST
(
'IVR'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
),
'divp'
:
LAST
(
'DivP'
)
}
all_data
=
engine
.
fetch_data_range
(
universe
,
total_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
rets
=
[]
turn_overs
=
[]
leverags
=
[]
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
]
codes
=
data
.
code
.
tolist
()
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
alpha_logger
.
info
(
'{0}: {1}'
.
format
(
date
,
len
(
total_data
)))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
industry
=
total_data
.
industry_code
.
values
dx_return
=
total_data
.
dx
.
values
benchmark_w
=
total_data
.
weight
.
values
constraint_exp
=
total_data
[
constraint_risk
]
.
values
risk_exp_expand
=
np
.
concatenate
((
constraint_exp
,
np
.
ones
((
len
(
risk_exp
),
1
))),
axis
=
1
)
.
astype
(
float
)
risk_names
=
constraint_risk
+
[
'total'
]
risk_target
=
risk_exp_expand
.
T
@
benchmark_w
lbound
=
np
.
zeros
(
len
(
total_data
))
ubound
=
0.01
+
benchmark_w
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
i
,
name
in
enumerate
(
risk_names
):
if
name
==
'total'
or
name
==
'SIZE'
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
i
],
upper_bound
=
risk_target
[
i
])
else
:
constraint
.
set_constraints
(
name
,
lower_bound
=
risk_target
[
i
]
*
industry_lower
,
upper_bound
=
risk_target
[
i
]
*
industry_upper
)
factor_values
=
factor_processing
(
total_data
[
const_features
]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
# const linear model
er1
=
const_model
.
predict
(
factor_values
)
# linear regression model
models
=
models_series
[
models_series
.
index
<=
date
]
model
=
models
[
-
1
]
x
=
predict_x
[
date
]
er2
=
model
.
predict
(
x
)
# combine model
er1_table
=
pd
.
DataFrame
({
'er1'
:
er1
/
er1
.
std
(),
'code'
:
total_data
.
code
.
values
})
er2_table
=
pd
.
DataFrame
({
'er2'
:
er2
/
er2
.
std
(),
'code'
:
settlement
.
loc
[
settlement
.
trade_date
==
date
,
'code'
]
.
values
})
er_table
=
pd
.
merge
(
er1_table
,
er2_table
,
on
=
[
'code'
],
how
=
'left'
)
.
fillna
(
0
)
er
=
(
er_table
.
er1
+
er_table
.
er2
)
.
values
target_pos
,
_
=
er_portfolio_analysis
(
er
,
industry
,
dx_return
,
constraint
,
False
,
benchmark_w
,
method
=
method
,
use_rank
=
use_rank
)
target_pos
[
'code'
]
=
total_data
[
'code'
]
.
values
turn_over
,
executed_pos
=
executor
.
execute
(
target_pos
=
target_pos
)
executed_codes
=
executed_pos
.
code
.
tolist
()
dx_returns
=
engine
.
fetch_dx_return
(
date
,
executed_codes
,
horizon
=
horizon
)
result
=
pd
.
merge
(
executed_pos
,
total_data
[[
'code'
,
'weight'
]],
on
=
[
'code'
],
how
=
'inner'
)
result
=
pd
.
merge
(
result
,
dx_returns
,
on
=
[
'code'
])
leverage
=
result
.
weight_x
.
abs
()
.
sum
()
ret
=
(
result
.
weight_x
-
result
.
weight_y
*
leverage
/
result
.
weight_y
.
sum
())
.
values
@
np
.
exp
(
result
.
dx
.
values
)
rets
.
append
(
ret
)
executor
.
set_current
(
executed_pos
)
turn_overs
.
append
(
turn_over
)
leverags
.
append
(
leverage
)
alpha_logger
.
info
(
'{0} is finished'
.
format
(
date
))
ret_df
=
pd
.
DataFrame
({
'returns'
:
rets
,
'turn_over'
:
turn_overs
,
'leverage'
:
leverage
},
index
=
ref_dates
)
ret_df
.
loc
[
advanceDateByCalendar
(
'china.sse'
,
ref_dates
[
-
1
],
frequency
)]
=
0.
ret_df
=
ret_df
.
shift
(
1
)
ret_df
.
iloc
[
0
]
=
0.
ret_df
[
'tc_cost'
]
=
ret_df
.
turn_over
*
0.002
ret_df
[[
'returns'
,
'tc_cost'
]]
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
),
title
=
'Fixed frequency rebalanced: {0}'
.
format
(
frequency
),
secondary_y
=
'tc_cost'
)
plt
.
show
()
alphamind/examples/formula_expression.py
View file @
daa642f6
...
...
@@ -17,19 +17,10 @@ import datetime as dt
start
=
dt
.
datetime
.
now
()
formula1
=
CSRank
(
DIFF
(
LOG
(
"turnoverVol"
)))
formula2
=
CSRank
((
LAST
(
'closePrice'
)
-
LAST
(
'openPrice'
))
/
LAST
(
'openPrice'
))
expression
=
-
CORR
(
6
,
formula1
^
formula2
)
# expression1 = -0.6 * LAST('con_pe_rolling') - 0.6 * LAST('con_pb_rolling') + 0.6 * LAST('con_eps') + 1.2 * LAST('con_target_price')
# expression2 = LAST("IVR")
universe_name
=
'zz500'
#expression = expression1 + expression2
# factor1 = LAST('RVOL')
# factor2 = LAST('IVR')
# expression = RES(20, factor2 ^ factor1)
# expression = MA(1, "EPS")
factor_name
=
'ROIC'
expression
=
LAST
(
factor_name
)
alpha_factor_name
=
'alpha_factor'
alpha_factor
=
{
alpha_factor_name
:
expression
}
...
...
@@ -37,22 +28,26 @@ alpha_factor = {alpha_factor_name: expression}
# end of formula definition
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'custom'
,
[
'ashare_ex'
])
universe
=
Universe
(
'custom'
,
[
universe_name
])
benchmark_code
=
905
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
freq
=
'
1
w'
freq
=
'
2
w'
n_bins
=
5
horizon
=
map_freq
(
freq
)
dates
=
makeSchedule
(
'2012-01-01'
,
'2017-10-11'
,
start_date
=
'2012-01-01'
end_date
=
'2017-11-03'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
,
benchmark
=
905
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
4
)
benchmark
=
benchmark_code
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
...
...
@@ -91,7 +86,68 @@ df = pd.DataFrame(final_res, index=dates)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1d'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
plt
.
show
()
fig
,
axes
=
plt
.
subplots
(
1
,
2
,
figsize
=
(
18
,
6
))
df
=
df
.
cumsum
()
.
plot
(
ax
=
axes
[
0
],
title
=
'Quantile Analysis for {0}'
.
format
(
factor_name
))
# =================================================================== #
factor_name
=
'ROE'
expression
=
LAST
(
factor_name
)
alpha_factor_name
=
'alpha_factor'
alpha_factor
=
{
alpha_factor_name
:
expression
}
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
,
benchmark
=
benchmark_code
)[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
factor_groups
=
factor_all_data
.
groupby
(
'trade_date'
)
return_groups
=
return_all_data
.
groupby
(
'trade_date'
)
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'code'
,
alpha_factor_name
,
'isOpen'
,
'weight'
]
+
neutralize_risk
]
codes
=
data
.
code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
benchmark
=
total_data
.
weight
.
values
f_data
=
total_data
[[
alpha_factor_name
]]
try
:
er
=
factor_processing
(
total_data
[[
alpha_factor_name
]]
.
values
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
risk_exp
,
post_process
=
[
winsorize_normal
,
standardize
])
res
=
er_quantile_analysis
(
er
,
n_bins
=
n_bins
,
dx_return
=
dx_return
,
benchmark
=
benchmark
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1d'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
(
ax
=
axes
[
1
],
title
=
'Quantile Analysis for {0}'
.
format
(
factor_name
))
plt
.
show
()
print
(
dt
.
datetime
.
now
()
-
start
)
\ No newline at end of file
alphamind/examples/model_training.py
View file @
daa642f6
...
...
@@ -28,8 +28,8 @@ base factors - all the risk styles
quantiles - 5
start_date - 2012-01-01
end_date - 2017-08-01
re-balance -
1
week
training - every
4
week
re-balance -
2
week
training - every
8
week
'''
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
...
...
@@ -37,13 +37,24 @@ universe = Universe('zz500', ['zz500'])
neutralize_risk
=
industry_styles
portfolio_risk_neutralize
=
[]
portfolio_industry_neutralize
=
True
alpha_factors
=
[
'VAL'
,
'RVOL'
,
'ROEDiluted'
,
'GREV'
,
'EPS'
,
'CHV'
,
'CFinc1'
,
'BDTO'
]
# ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
alpha_factors
=
{
'eps'
:
LAST
(
'eps_q'
),
'roe'
:
LAST
(
'roe_q'
),
'bdto'
:
LAST
(
'BDTO'
),
'cfinc1'
:
LAST
(
'CFinc1'
),
'chv'
:
LAST
(
'CHV'
),
'rvol'
:
LAST
(
'RVOL'
),
'val'
:
LAST
(
'VAL'
),
'grev'
:
LAST
(
'GREV'
),
'droeafternonorecurring'
:
LAST
(
'DROEAfterNonRecurring'
)}
benchmark
=
905
n_bins
=
5
frequency
=
'2w'
batch
=
4
start_date
=
'201
7
-01-01'
end_date
=
'2017-
09-26
'
batch
=
8
start_date
=
'201
2
-01-01'
end_date
=
'2017-
11-05
'
method
=
'risk_neutral'
use_rank
=
100
...
...
@@ -74,24 +85,10 @@ train_y = data_package['train']['y']
dates
=
sorted
(
train_x
.
keys
())
model_df
=
pd
.
Series
()
features
=
data_package
[
'x_names'
]
for
train_date
in
dates
:
model
=
LinearRegression
(
alpha_factors
,
fit_intercept
=
False
)
#model = LassoCV(fit_intercept=False)
# model = AdaBoostRegressor(n_estimators=100)
#model = RandomForestRegressor(n_estimators=100, n_jobs=4)
#model = NuSVR(kernel='rbf', C=1e-3, gamma=0.1)
# model = ConstLinearModel(alpha_factors, np.array([0.034129344,
# 0.015881607,
# 0.048765746,
# 0.042747382,
# -0.015900173,
# 0.019044573,
# -0.001792638,
# 0.014277867,
# ]))
# model = ConstLinearModel(alpha_factors, np.array([1.] * len(alpha_factors)))
model
=
LinearRegression
(
features
,
fit_intercept
=
False
)
x
=
train_x
[
train_date
]
y
=
train_y
[
train_date
]
...
...
@@ -99,43 +96,14 @@ for train_date in dates:
model_df
.
loc
[
train_date
]
=
model
alpha_logger
.
info
(
'trade_date: {0} training finished'
.
format
(
train_date
))
'''
predicting phase: using trained model on the re-balance dates
predicting phase: using trained model on the re-balance dates
(optimizing with risk neutral)
'''
predict_x
=
data_package
[
'predict'
][
'x'
]
settlement
=
data_package
[
'settlement'
]
# final_res = np.zeros((len(dates), n_bins))
#
# for i, predict_date in enumerate(dates):
# model = model_df[predict_date]
# x = predict_x[predict_date]
# benchmark_w = settlement[settlement.trade_date == predict_date]['weight'].values
# realized_r = settlement[settlement.trade_date == predict_date]['dx'].values
#
# predict_y = model.predict(x)
#
# res = er_quantile_analysis(predict_y,
# n_bins,
# dx_return=realized_r,
# benchmark=benchmark_w)
#
# final_res[i] = res / benchmark_w.sum()
# print('trade_date: {0} predicting finished'.format(train_date))
#
# last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
#
# df = pd.DataFrame(final_res, index=dates[1:] + [last_date])
# df.sort_index(inplace=True)
# df.cumsum().plot()
# plt.title('Risk style factors model training with Linear Regression from 2012 - 2017')
# plt.show()
'''
predicting phase: using trained model on the re-balance dates (optimizing with risk neutral)
'''
industry_dummies
=
pd
.
get_dummies
(
settlement
[
'industry'
]
.
values
)
risk_styles
=
settlement
[
portfolio_risk_neutralize
]
.
values
...
...
@@ -187,11 +155,6 @@ for i, predict_date in enumerate(dates):
method
=
method
,
use_rank
=
use_rank
)
# model_res = pd.DataFrame({'weight': model.coef_[0],
# 'factor': np.array(data_package['x_names'])})
# model_res.to_csv(r'\\10.63.6.71\sharespace\personal\licheng\portfolio\zz500_model\{0}.csv'.format(predict_date.strftime('%Y-%m-%d')))
final_res
[
i
]
=
analysis
[
'er'
][
'total'
]
/
benchmark_w
.
sum
()
alpha_logger
.
info
(
'trade_date: {0} predicting finished'
.
format
(
predict_date
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment