Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
a5260eba
Commit
a5260eba
authored
Aug 24, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added model training example
parent
db8c05c3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
140 additions
and
5 deletions
+140
-5
processing.py
alphamind/data/processing.py
+5
-4
model_training.py
alphamind/examples/model_training.py
+130
-0
preparing.py
alphamind/model/preparing.py
+5
-1
No files found.
alphamind/data/processing.py
View file @
a5260eba
...
@@ -14,20 +14,21 @@ from alphamind.data.neutralize import neutralize
...
@@ -14,20 +14,21 @@ from alphamind.data.neutralize import neutralize
def
factor_processing
(
raw_factors
:
np
.
ndarray
,
def
factor_processing
(
raw_factors
:
np
.
ndarray
,
pre_process
:
Optional
[
List
]
=
None
,
pre_process
:
Optional
[
List
]
=
None
,
risk_factors
:
Optional
[
np
.
ndarray
]
=
None
,
risk_factors
:
Optional
[
np
.
ndarray
]
=
None
,
post_process
:
Optional
[
List
]
=
None
)
->
np
.
ndarray
:
post_process
:
Optional
[
List
]
=
None
,
groups
=
None
)
->
np
.
ndarray
:
new_factors
=
raw_factors
new_factors
=
raw_factors
if
pre_process
:
if
pre_process
:
for
p
in
pre_process
:
for
p
in
pre_process
:
new_factors
=
p
(
new_factors
)
new_factors
=
p
(
new_factors
,
groups
=
groups
)
if
risk_factors
is
not
None
:
if
risk_factors
is
not
None
:
risk_factors
=
risk_factors
[:,
risk_factors
.
sum
(
axis
=
0
)
!=
0
]
risk_factors
=
risk_factors
[:,
risk_factors
.
sum
(
axis
=
0
)
!=
0
]
new_factors
=
neutralize
(
risk_factors
,
new_factors
)
new_factors
=
neutralize
(
risk_factors
,
new_factors
,
groups
=
groups
)
if
post_process
:
if
post_process
:
for
p
in
post_process
:
for
p
in
post_process
:
new_factors
=
p
(
new_factors
)
new_factors
=
p
(
new_factors
,
groups
=
groups
)
return
new_factors
return
new_factors
alphamind/examples/model_training.py
0 → 100644
View file @
a5260eba
# -*- coding: utf-8 -*-
"""
Created on 2017-8-24
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
import
copy
from
sklearn.linear_model
import
LinearRegression
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
plt
.
style
.
use
(
'ggplot'
)
'''
Settings:
universe - zz500
neutralize - 'SIZE' + all industries
benchmark - zz500
base factors - ['CFinc1', 'CHV', 'VAL', 'BDTO', 'RVOL']
quantiles - 5
start_date - 2012-01-01
end_date - 2017-08-01
re-balance - 1 week
training - every 4 week
'''
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
alpha_factors
=
[
'CFinc1'
,
'CHV'
,
'VAL'
,
'BDTO'
,
'RVOL'
]
benchmark
=
905
n_bins
=
5
frequency
=
'1w'
batch
=
4
start_date
=
'2012-01-01'
end_date
=
'2017-08-01'
'''
fetch data from target data base
'''
train_y
,
train_x
=
prepare_data
(
engine
,
start_date
=
start_date
,
end_date
=
end_date
,
factors
=
alpha_factors
+
neutralize_risk
,
frequency
=
frequency
,
universe
=
universe
,
benchmark
=
benchmark
)
dates
=
train_x
.
Date
.
unique
()
groups
=
train_x
.
Date
.
values
raw_x
=
train_x
[
alpha_factors
]
.
values
.
astype
(
float
)
raw_y
=
train_y
[[
'dx'
]]
.
values
.
astype
(
float
)
benchmark_w
=
train_x
[
'weight'
]
.
values
risk_exp
=
train_x
[
neutralize_risk
]
.
values
.
astype
(
float
)
'''
pre-processing stage for winsorize, standardize and neutralize
'''
ne_x
=
raw_x
.
copy
()
ne_y
=
raw_y
.
copy
()
for
i
,
start_date
in
enumerate
(
dates
[:
-
batch
]):
end_date
=
dates
[
i
+
batch
]
index
=
(
groups
>=
start_date
)
&
(
groups
<
end_date
)
this_raw_x
=
raw_x
[
index
]
this_raw_y
=
raw_y
[
index
]
this_risk_exp
=
risk_exp
[
index
]
ne_x
[
index
]
=
factor_processing
(
this_raw_x
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
this_risk_exp
,
post_process
=
[
standardize
])
ne_y
[
index
]
=
factor_processing
(
this_raw_y
,
pre_process
=
[
winsorize_normal
,
standardize
],
risk_factors
=
this_risk_exp
,
post_process
=
[
standardize
])
'''
training phase: using Linear - regression from scikit-learn
'''
model
=
LinearRegression
(
fit_intercept
=
False
)
model_df
=
pd
.
Series
()
for
i
,
start_date
in
enumerate
(
dates
[:
-
batch
]):
end_date
=
dates
[
i
+
batch
]
index
=
(
groups
>=
start_date
)
&
(
groups
<
end_date
)
this_ne_x
=
ne_x
[
index
]
this_ne_y
=
ne_y
[
index
]
model
.
fit
(
this_ne_x
,
this_ne_y
)
model_df
.
loc
[
end_date
]
=
copy
.
deepcopy
(
model
)
print
(
'Date: {0} training finished'
.
format
(
end_date
))
'''
predicting phase: using trained model on the re-balance dates
'''
final_res
=
np
.
zeros
((
len
(
dates
)
-
batch
,
n_bins
))
for
i
,
predict_date
in
enumerate
(
dates
[
batch
:]):
model
=
model_df
[
predict_date
]
index
=
groups
==
predict_date
this_ne_x
=
ne_x
[
index
]
realized_r
=
raw_y
[
index
]
this_benchmark_w
=
benchmark_w
[
index
]
predict_y
=
model
.
predict
(
this_ne_x
)
res
=
er_quantile_analysis
(
predict_y
,
n_bins
,
dx_return
=
realized_r
,
benchmark
=
this_benchmark_w
)
final_res
[
i
]
=
res
/
this_benchmark_w
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
[
batch
:])
df
.
loc
[
dates
[
0
]]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
plt
.
title
(
'Prod factors model training with Linear Regression from 2012 - 2017'
)
plt
.
show
()
alphamind/model/preparing.py
View file @
a5260eba
...
@@ -34,6 +34,7 @@ def prepare_data(engine: SqlEngine,
...
@@ -34,6 +34,7 @@ def prepare_data(engine: SqlEngine,
end_date
:
str
,
end_date
:
str
,
frequency
:
str
,
frequency
:
str
,
universe
:
Universe
,
universe
:
Universe
,
benchmark
:
int
,
default_window
:
int
=
0
):
default_window
:
int
=
0
):
dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
)
dates
=
makeSchedule
(
start_date
,
end_date
,
frequency
,
calendar
=
'china.sse'
,
dateRule
=
BizDayConventions
.
Following
)
...
@@ -46,10 +47,13 @@ def prepare_data(engine: SqlEngine,
...
@@ -46,10 +47,13 @@ def prepare_data(engine: SqlEngine,
dates
=
dates
,
dates
=
dates
,
default_window
=
default_window
)
.
sort_values
([
'Date'
,
'Code'
])
default_window
=
default_window
)
.
sort_values
([
'Date'
,
'Code'
])
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'Date'
,
'Code'
])
.
dropna
()
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'Date'
,
'Code'
])
.
dropna
()
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'Date'
,
'Code'
],
how
=
'left'
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
return
df
[[
'Date'
,
'Code'
,
'dx'
]],
df
[[
'Date'
,
'Code'
]
+
transformer
.
names
]
return
df
[[
'Date'
,
'Code'
,
'dx'
]],
df
[[
'Date'
,
'Code'
,
'weight'
]
+
transformer
.
names
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment