Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
bd3c56b0
Commit
bd3c56b0
authored
Aug 25, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
replace column names
parent
9e66adbd
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
71 additions
and
69 deletions
+71
-69
calculators.py
alphamind/analysis/calculators.py
+3
-3
sqlengine.py
alphamind/data/engines/sqlengine.py
+44
-44
universe.py
alphamind/data/engines/universe.py
+6
-6
model_training.py
alphamind/examples/model_training.py
+10
-8
data_preparing.py
alphamind/model/data_preparing.py
+8
-8
No files found.
alphamind/analysis/calculators.py
View file @
bd3c56b0
...
@@ -10,13 +10,13 @@ import pandas as pd
...
@@ -10,13 +10,13 @@ import pandas as pd
def
calculate_turn_over
(
pos_table
:
pd
.
DataFrame
)
->
pd
.
DataFrame
:
def
calculate_turn_over
(
pos_table
:
pd
.
DataFrame
)
->
pd
.
DataFrame
:
turn_over_table
=
{}
turn_over_table
=
{}
total_factors
=
pos_table
.
columns
.
difference
([
'
C
ode'
])
total_factors
=
pos_table
.
columns
.
difference
([
'
c
ode'
])
pos_table
.
reset_index
()
pos_table
.
reset_index
()
for
name
in
total_factors
:
for
name
in
total_factors
:
pivot_position
=
pos_table
.
pivot
(
values
=
name
,
columns
=
'
C
ode'
)
.
fillna
(
0.
)
pivot_position
=
pos_table
.
pivot
(
values
=
name
,
columns
=
'
c
ode'
)
.
fillna
(
0.
)
turn_over_series
=
pivot_position
.
diff
()
.
abs
()
.
sum
(
axis
=
1
)
turn_over_series
=
pivot_position
.
diff
()
.
abs
()
.
sum
(
axis
=
1
)
turn_over_table
[
name
]
=
turn_over_series
.
values
turn_over_table
[
name
]
=
turn_over_series
.
values
turn_over_table
=
pd
.
DataFrame
(
turn_over_table
,
index
=
pos_table
.
D
ate
.
unique
())
turn_over_table
=
pd
.
DataFrame
(
turn_over_table
,
index
=
pos_table
.
trade_d
ate
.
unique
())
return
turn_over_table
[
total_factors
]
return
turn_over_table
[
total_factors
]
alphamind/data/engines/sqlengine.py
View file @
bd3c56b0
...
@@ -108,7 +108,7 @@ def _map_risk_model_table(risk_model: str) -> tuple:
...
@@ -108,7 +108,7 @@ def _map_risk_model_table(risk_model: str) -> tuple:
def
_map_factors
(
factors
:
Iterable
[
str
])
->
dict
:
def
_map_factors
(
factors
:
Iterable
[
str
])
->
dict
:
factor_cols
=
{}
factor_cols
=
{}
excluded
=
{
'
Date'
,
'C
ode'
,
'isOpen'
,
}
excluded
=
{
'
trade_date'
,
'c
ode'
,
'isOpen'
,
}
for
f
in
factors
:
for
f
in
factors
:
for
t
in
factor_tables
:
for
t
in
factor_tables
:
if
f
not
in
excluded
and
f
in
t
.
__table__
.
columns
:
if
f
not
in
excluded
and
f
in
t
.
__table__
.
columns
:
...
@@ -140,7 +140,7 @@ class SqlEngine(object):
...
@@ -140,7 +140,7 @@ class SqlEngine(object):
def
fetch_strategy
(
self
,
ref_date
:
str
,
strategy
:
str
)
->
pd
.
DataFrame
():
def
fetch_strategy
(
self
,
ref_date
:
str
,
strategy
:
str
)
->
pd
.
DataFrame
():
query
=
select
([
Strategy
.
strategyName
,
Strategy
.
factor
,
Strategy
.
weight
])
.
where
(
query
=
select
([
Strategy
.
strategyName
,
Strategy
.
factor
,
Strategy
.
weight
])
.
where
(
and_
(
and_
(
Strategy
.
D
ate
==
ref_date
,
Strategy
.
trade_d
ate
==
ref_date
,
Strategy
.
strategyName
==
strategy
Strategy
.
strategyName
==
strategy
)
)
)
)
...
@@ -179,12 +179,12 @@ class SqlEngine(object):
...
@@ -179,12 +179,12 @@ class SqlEngine(object):
else
:
else
:
end_date
=
expiry_date
end_date
=
expiry_date
query
=
select
([
DailyReturn
.
C
ode
,
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
query
=
select
([
DailyReturn
.
c
ode
,
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
and_
(
and_
(
DailyReturn
.
D
ate
.
between
(
start_date
,
end_date
),
DailyReturn
.
trade_d
ate
.
between
(
start_date
,
end_date
),
DailyReturn
.
C
ode
.
in_
(
codes
)
DailyReturn
.
c
ode
.
in_
(
codes
)
)
)
)
.
group_by
(
DailyReturn
.
C
ode
)
)
.
group_by
(
DailyReturn
.
c
ode
)
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
...
@@ -202,21 +202,21 @@ class SqlEngine(object):
...
@@ -202,21 +202,21 @@ class SqlEngine(object):
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
horizon
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
horizon
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
DailyReturn
,
q2
,
and_
(
DailyReturn
.
Date
==
q2
.
c
.
Date
,
DailyReturn
.
Code
==
q2
.
c
.
C
ode
))
big_table
=
join
(
DailyReturn
,
q2
,
and_
(
DailyReturn
.
trade_date
==
q2
.
c
.
trade_date
,
DailyReturn
.
code
==
q2
.
c
.
c
ode
))
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
over
(
stats
=
func
.
sum
(
self
.
ln_func
(
1.
+
DailyReturn
.
d1
))
.
over
(
partition_by
=
DailyReturn
.
C
ode
,
partition_by
=
DailyReturn
.
c
ode
,
order_by
=
DailyReturn
.
D
ate
,
order_by
=
DailyReturn
.
trade_d
ate
,
rows
=
(
0
,
horizon
))
.
label
(
'dx'
)
rows
=
(
0
,
horizon
))
.
label
(
'dx'
)
query
=
select
([
DailyReturn
.
Date
,
DailyReturn
.
C
ode
,
stats
])
\
query
=
select
([
DailyReturn
.
trade_date
,
DailyReturn
.
c
ode
,
stats
])
\
.
select_from
(
big_table
)
\
.
select_from
(
big_table
)
\
.
where
(
DailyReturn
.
D
ate
.
between
(
start_date
,
end_date
))
.
where
(
DailyReturn
.
trade_d
ate
.
between
(
start_date
,
end_date
))
df
=
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
df
=
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
if
dates
:
if
dates
:
df
=
df
[
df
.
D
ate
.
isin
(
dates
)]
df
=
df
[
df
.
trade_d
ate
.
isin
(
dates
)]
return
df
return
df
...
@@ -240,17 +240,17 @@ class SqlEngine(object):
...
@@ -240,17 +240,17 @@ class SqlEngine(object):
big_table
=
Market
big_table
=
Market
for
t
in
set
(
factor_cols
.
values
()):
for
t
in
set
(
factor_cols
.
values
()):
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
Date
==
t
.
Date
,
Market
.
Code
==
t
.
C
ode
))
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
trade_date
==
t
.
trade_date
,
Market
.
code
==
t
.
c
ode
))
query
=
select
([
Market
.
Date
,
Market
.
C
ode
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
query
=
select
([
Market
.
trade_date
,
Market
.
c
ode
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
.
select_from
(
big_table
)
\
.
select_from
(
big_table
)
\
.
where
(
and_
(
Market
.
Date
.
between
(
start_date
,
end_date
),
Market
.
C
ode
.
in_
(
codes
)))
.
where
(
and_
(
Market
.
trade_date
.
between
(
start_date
,
end_date
),
Market
.
c
ode
.
in_
(
codes
)))
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
Date'
,
'Code'
])
.
set_index
(
'D
ate'
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
trade_date'
,
'code'
])
.
set_index
(
'trade_d
ate'
)
res
=
transformer
.
transform
(
'
C
ode'
,
df
)
res
=
transformer
.
transform
(
'
c
ode'
,
df
)
for
col
in
res
.
columns
:
for
col
in
res
.
columns
:
if
col
not
in
set
([
'
C
ode'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
if
col
not
in
set
([
'
c
ode'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
df
[
col
]
=
res
[
col
]
.
values
df
=
df
.
loc
[
ref_date
]
df
=
df
.
loc
[
ref_date
]
...
@@ -296,18 +296,18 @@ class SqlEngine(object):
...
@@ -296,18 +296,18 @@ class SqlEngine(object):
q2
=
universe
.
query_range
(
real_start_date
,
real_end_date
,
real_dates
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
real_start_date
,
real_end_date
,
real_dates
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
Market
,
q2
,
and_
(
Market
.
Date
==
q2
.
c
.
Date
,
Market
.
Code
==
q2
.
c
.
C
ode
))
big_table
=
join
(
Market
,
q2
,
and_
(
Market
.
trade_date
==
q2
.
c
.
trade_date
,
Market
.
code
==
q2
.
c
.
c
ode
))
for
t
in
set
(
factor_cols
.
values
()):
for
t
in
set
(
factor_cols
.
values
()):
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
Date
==
t
.
Date
,
Market
.
Code
==
t
.
C
ode
))
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
trade_date
==
t
.
trade_date
,
Market
.
code
==
t
.
c
ode
))
query
=
select
([
Market
.
Date
,
Market
.
C
ode
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
query
=
select
([
Market
.
trade_date
,
Market
.
c
ode
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
.
select_from
(
big_table
)
.
select_from
(
big_table
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
Date'
,
'Code'
])
.
set_index
(
'D
ate'
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
trade_date'
,
'code'
])
.
set_index
(
'trade_d
ate'
)
res
=
transformer
.
transform
(
'
C
ode'
,
df
)
res
=
transformer
.
transform
(
'
c
ode'
,
df
)
for
col
in
res
.
columns
:
for
col
in
res
.
columns
:
if
col
not
in
set
([
'
C
ode'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
if
col
not
in
set
([
'
c
ode'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
df
[
col
]
=
res
[
col
]
.
values
if
dates
:
if
dates
:
df
=
df
[
df
.
index
.
isin
(
dates
)]
df
=
df
[
df
.
index
.
isin
(
dates
)]
...
@@ -318,9 +318,9 @@ class SqlEngine(object):
...
@@ -318,9 +318,9 @@ class SqlEngine(object):
def
fetch_benchmark
(
self
,
def
fetch_benchmark
(
self
,
ref_date
:
str
,
ref_date
:
str
,
benchmark
:
int
)
->
pd
.
DataFrame
:
benchmark
:
int
)
->
pd
.
DataFrame
:
query
=
select
([
IndexComponent
.
C
ode
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
query
=
select
([
IndexComponent
.
c
ode
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
and_
(
IndexComponent
.
D
ate
==
ref_date
,
IndexComponent
.
trade_d
ate
==
ref_date
,
IndexComponent
.
indexCode
==
benchmark
IndexComponent
.
indexCode
==
benchmark
)
)
)
)
...
@@ -333,10 +333,10 @@ class SqlEngine(object):
...
@@ -333,10 +333,10 @@ class SqlEngine(object):
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
cond
=
IndexComponent
.
Date
.
in_
(
dates
)
if
dates
else
IndexComponent
.
D
ate
.
between
(
start_date
,
end_date
)
cond
=
IndexComponent
.
trade_date
.
in_
(
dates
)
if
dates
else
IndexComponent
.
trade_d
ate
.
between
(
start_date
,
end_date
)
query
=
select
(
query
=
select
(
[
IndexComponent
.
Date
,
IndexComponent
.
C
ode
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
[
IndexComponent
.
trade_date
,
IndexComponent
.
c
ode
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
and_
(
cond
,
cond
,
IndexComponent
.
indexCode
==
benchmark
IndexComponent
.
indexCode
==
benchmark
...
@@ -355,18 +355,18 @@ class SqlEngine(object):
...
@@ -355,18 +355,18 @@ class SqlEngine(object):
query
=
select
([
risk_cov_table
.
FactorID
,
query
=
select
([
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
D
ate
==
ref_date
risk_cov_table
.
trade_d
ate
==
ref_date
)
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
(
'FactorID'
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
(
'FactorID'
)
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
if
f
not
in
set
(
excluded
)]
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
if
f
not
in
set
(
excluded
)]
big_table
=
outerjoin
(
special_risk_table
,
RiskExposure
,
big_table
=
outerjoin
(
special_risk_table
,
RiskExposure
,
and_
(
special_risk_table
.
Date
==
RiskExposure
.
D
ate
,
and_
(
special_risk_table
.
trade_date
==
RiskExposure
.
trade_d
ate
,
special_risk_table
.
Code
==
RiskExposure
.
C
ode
))
special_risk_table
.
code
==
RiskExposure
.
c
ode
))
query
=
select
(
query
=
select
(
[
RiskExposure
.
C
ode
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
[
RiskExposure
.
c
ode
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
.
select_from
(
big_table
)
\
.
select_from
(
big_table
)
\
.
where
(
and_
(
RiskExposure
.
Date
==
ref_date
,
RiskExposure
.
C
ode
.
in_
(
codes
)))
.
where
(
and_
(
RiskExposure
.
trade_date
==
ref_date
,
RiskExposure
.
c
ode
.
in_
(
codes
)))
risk_exp
=
pd
.
read_sql
(
query
,
self
.
engine
)
risk_exp
=
pd
.
read_sql
(
query
,
self
.
engine
)
...
@@ -384,30 +384,30 @@ class SqlEngine(object):
...
@@ -384,30 +384,30 @@ class SqlEngine(object):
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
cond
=
risk_cov_table
.
Date
.
in_
(
dates
)
if
dates
else
risk_cov_table
.
D
ate
.
between
(
start_date
,
end_date
)
cond
=
risk_cov_table
.
trade_date
.
in_
(
dates
)
if
dates
else
risk_cov_table
.
trade_d
ate
.
between
(
start_date
,
end_date
)
query
=
select
([
risk_cov_table
.
D
ate
,
query
=
select
([
risk_cov_table
.
trade_d
ate
,
risk_cov_table
.
FactorID
,
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
+
cov_risk_cols
)
.
where
(
cond
cond
)
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
D
ate'
,
'FactorID'
])
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'
trade_d
ate'
,
'FactorID'
])
if
not
excluded
:
if
not
excluded
:
excluded
=
[]
excluded
=
[]
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
if
f
not
in
set
(
excluded
)]
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
if
f
not
in
set
(
excluded
)]
big_table
=
outerjoin
(
special_risk_table
,
RiskExposure
,
big_table
=
outerjoin
(
special_risk_table
,
RiskExposure
,
and_
(
special_risk_table
.
Date
==
RiskExposure
.
D
ate
,
and_
(
special_risk_table
.
trade_date
==
RiskExposure
.
trade_d
ate
,
special_risk_table
.
Code
==
RiskExposure
.
C
ode
))
special_risk_table
.
code
==
RiskExposure
.
c
ode
))
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
big_table
,
q2
,
big_table
=
join
(
big_table
,
q2
,
and_
(
special_risk_table
.
Date
==
q2
.
c
.
Date
,
special_risk_table
.
Code
==
q2
.
c
.
C
ode
))
and_
(
special_risk_table
.
trade_date
==
q2
.
c
.
trade_date
,
special_risk_table
.
code
==
q2
.
c
.
c
ode
))
query
=
select
(
query
=
select
(
[
RiskExposure
.
Date
,
RiskExposure
.
C
ode
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
[
RiskExposure
.
trade_date
,
RiskExposure
.
c
ode
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
.
select_from
(
big_table
)
.
select_from
(
big_table
)
risk_exp
=
pd
.
read_sql
(
query
,
self
.
engine
)
risk_exp
=
pd
.
read_sql
(
query
,
self
.
engine
)
...
@@ -428,13 +428,13 @@ class SqlEngine(object):
...
@@ -428,13 +428,13 @@ class SqlEngine(object):
if
benchmark
:
if
benchmark
:
benchmark_data
=
self
.
fetch_benchmark
(
ref_date
,
benchmark
)
benchmark_data
=
self
.
fetch_benchmark
(
ref_date
,
benchmark
)
total_data
[
'benchmark'
]
=
benchmark_data
total_data
[
'benchmark'
]
=
benchmark_data
factor_data
=
pd
.
merge
(
factor_data
,
benchmark_data
,
how
=
'left'
,
on
=
[
'
C
ode'
])
factor_data
=
pd
.
merge
(
factor_data
,
benchmark_data
,
how
=
'left'
,
on
=
[
'
c
ode'
])
factor_data
[
'weight'
]
=
factor_data
[
'weight'
]
.
fillna
(
0.
)
factor_data
[
'weight'
]
=
factor_data
[
'weight'
]
.
fillna
(
0.
)
if
risk_model
:
if
risk_model
:
excluded
=
list
(
set
(
total_risk_factors
)
.
intersection
(
transformer
.
dependency
))
excluded
=
list
(
set
(
total_risk_factors
)
.
intersection
(
transformer
.
dependency
))
risk_cov
,
risk_exp
=
self
.
fetch_risk_model
(
ref_date
,
codes
,
risk_model
,
excluded
)
risk_cov
,
risk_exp
=
self
.
fetch_risk_model
(
ref_date
,
codes
,
risk_model
,
excluded
)
factor_data
=
pd
.
merge
(
factor_data
,
risk_exp
,
how
=
'left'
,
on
=
[
'
C
ode'
])
factor_data
=
pd
.
merge
(
factor_data
,
risk_exp
,
how
=
'left'
,
on
=
[
'
c
ode'
])
total_data
[
'risk_cov'
]
=
risk_cov
total_data
[
'risk_cov'
]
=
risk_cov
total_data
[
'factor'
]
=
factor_data
total_data
[
'factor'
]
=
factor_data
...
@@ -458,13 +458,13 @@ class SqlEngine(object):
...
@@ -458,13 +458,13 @@ class SqlEngine(object):
if
benchmark
:
if
benchmark
:
benchmark_data
=
self
.
fetch_benchmark_range
(
benchmark
,
start_date
,
end_date
,
dates
)
benchmark_data
=
self
.
fetch_benchmark_range
(
benchmark
,
start_date
,
end_date
,
dates
)
total_data
[
'benchmark'
]
=
benchmark_data
total_data
[
'benchmark'
]
=
benchmark_data
factor_data
=
pd
.
merge
(
factor_data
,
benchmark_data
,
how
=
'left'
,
on
=
[
'
Date'
,
'C
ode'
])
factor_data
=
pd
.
merge
(
factor_data
,
benchmark_data
,
how
=
'left'
,
on
=
[
'
trade_date'
,
'c
ode'
])
factor_data
[
'weight'
]
=
factor_data
[
'weight'
]
.
fillna
(
0.
)
factor_data
[
'weight'
]
=
factor_data
[
'weight'
]
.
fillna
(
0.
)
if
risk_model
:
if
risk_model
:
excluded
=
list
(
set
(
total_risk_factors
)
.
intersection
(
transformer
.
dependency
))
excluded
=
list
(
set
(
total_risk_factors
)
.
intersection
(
transformer
.
dependency
))
risk_cov
,
risk_exp
=
self
.
fetch_risk_model_range
(
universe
,
start_date
,
end_date
,
dates
,
risk_model
,
excluded
)
risk_cov
,
risk_exp
=
self
.
fetch_risk_model_range
(
universe
,
start_date
,
end_date
,
dates
,
risk_model
,
excluded
)
factor_data
=
pd
.
merge
(
factor_data
,
risk_exp
,
how
=
'left'
,
on
=
[
'
Date'
,
'C
ode'
])
factor_data
=
pd
.
merge
(
factor_data
,
risk_exp
,
how
=
'left'
,
on
=
[
'
trade_date'
,
'c
ode'
])
total_data
[
'risk_cov'
]
=
risk_cov
total_data
[
'risk_cov'
]
=
risk_cov
total_data
[
'factor'
]
=
factor_data
total_data
[
'factor'
]
=
factor_data
...
...
alphamind/data/engines/universe.py
View file @
bd3c56b0
...
@@ -41,7 +41,7 @@ class Universe(object):
...
@@ -41,7 +41,7 @@ class Universe(object):
all_and_conditions
.
append
(
univ_out
)
all_and_conditions
.
append
(
univ_out
)
if
self
.
exclude_codes
:
if
self
.
exclude_codes
:
codes_out
=
UniverseTable
.
C
ode
.
notin_
(
self
.
exclude_codes
)
codes_out
=
UniverseTable
.
c
ode
.
notin_
(
self
.
exclude_codes
)
all_and_conditions
.
append
(
codes_out
)
all_and_conditions
.
append
(
codes_out
)
all_or_conditions
=
[]
all_or_conditions
=
[]
...
@@ -50,18 +50,18 @@ class Universe(object):
...
@@ -50,18 +50,18 @@ class Universe(object):
all_or_conditions
.
append
(
univ_in
)
all_or_conditions
.
append
(
univ_in
)
if
self
.
include_codes
:
if
self
.
include_codes
:
codes_in
=
UniverseTable
.
C
ode
.
in_
(
self
.
include_codes
)
codes_in
=
UniverseTable
.
c
ode
.
in_
(
self
.
include_codes
)
all_or_conditions
.
append
(
codes_in
)
all_or_conditions
.
append
(
codes_in
)
return
all_and_conditions
,
all_or_conditions
return
all_and_conditions
,
all_or_conditions
def
query
(
self
,
ref_date
):
def
query
(
self
,
ref_date
):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
C
ode
])
.
distinct
()
query
=
select
([
UniverseTable
.
trade_date
,
UniverseTable
.
c
ode
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
query
=
query
.
where
(
query
=
query
.
where
(
and_
(
and_
(
UniverseTable
.
D
ate
==
ref_date
,
UniverseTable
.
trade_d
ate
==
ref_date
,
or_
(
or_
(
and_
(
*
all_and_conditions
),
and_
(
*
all_and_conditions
),
*
all_or_conditions
*
all_or_conditions
...
@@ -72,10 +72,10 @@ class Universe(object):
...
@@ -72,10 +72,10 @@ class Universe(object):
return
query
return
query
def
query_range
(
self
,
start_date
=
None
,
end_date
=
None
,
dates
=
None
):
def
query_range
(
self
,
start_date
=
None
,
end_date
=
None
,
dates
=
None
):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
C
ode
])
.
distinct
()
query
=
select
([
UniverseTable
.
trade_date
,
UniverseTable
.
c
ode
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
dates_cond
=
UniverseTable
.
Date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
D
ate
.
between
(
start_date
,
end_date
)
dates_cond
=
UniverseTable
.
trade_date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
trade_d
ate
.
between
(
start_date
,
end_date
)
query
=
query
.
where
(
query
=
query
.
where
(
and_
(
and_
(
...
...
alphamind/examples/model_training.py
View file @
bd3c56b0
...
@@ -32,13 +32,13 @@ training - every 4 week
...
@@ -32,13 +32,13 @@ training - every 4 week
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
neutralize_risk
=
industry_styles
neutralize_risk
=
industry_styles
alpha_factors
=
risk_styles
alpha_factors
=
[
'BDTO'
,
'RVOL'
,
'CHV'
,
'VAL'
,
'CFinc1'
]
#
risk_styles
benchmark
=
905
benchmark
=
905
n_bins
=
5
n_bins
=
5
frequency
=
'1w'
frequency
=
'1w'
batch
=
4
batch
=
4
start_date
=
'2012-01-01'
start_date
=
'2012-01-01'
end_date
=
'2017-08-
0
1'
end_date
=
'2017-08-
3
1'
'''
'''
fetch data from target data base and do the corresponding data processing
fetch data from target data base and do the corresponding data processing
...
@@ -74,7 +74,7 @@ for train_date in dates:
...
@@ -74,7 +74,7 @@ for train_date in dates:
model
.
fit
(
x
,
y
)
model
.
fit
(
x
,
y
)
model_df
.
loc
[
train_date
]
=
copy
.
deepcopy
(
model
)
model_df
.
loc
[
train_date
]
=
copy
.
deepcopy
(
model
)
print
(
'
D
ate: {0} training finished'
.
format
(
train_date
))
print
(
'
trade_d
ate: {0} training finished'
.
format
(
train_date
))
'''
'''
predicting phase: using trained model on the re-balance dates
predicting phase: using trained model on the re-balance dates
...
@@ -89,8 +89,8 @@ final_res = np.zeros((len(dates), n_bins))
...
@@ -89,8 +89,8 @@ final_res = np.zeros((len(dates), n_bins))
for
i
,
predict_date
in
enumerate
(
dates
):
for
i
,
predict_date
in
enumerate
(
dates
):
model
=
model_df
[
predict_date
]
model
=
model_df
[
predict_date
]
x
=
predict_x
[
predict_date
]
x
=
predict_x
[
predict_date
]
benchmark_w
=
settlement
[
settlement
.
D
ate
==
predict_date
][
'weight'
]
.
values
benchmark_w
=
settlement
[
settlement
.
trade_d
ate
==
predict_date
][
'weight'
]
.
values
realized_r
=
settlement
[
settlement
.
D
ate
==
predict_date
][
'dx'
]
.
values
realized_r
=
settlement
[
settlement
.
trade_d
ate
==
predict_date
][
'dx'
]
.
values
predict_y
=
model
.
predict
(
x
)
predict_y
=
model
.
predict
(
x
)
...
@@ -100,12 +100,14 @@ for i, predict_date in enumerate(dates):
...
@@ -100,12 +100,14 @@ for i, predict_date in enumerate(dates):
benchmark
=
benchmark_w
)
benchmark
=
benchmark_w
)
final_res
[
i
]
=
res
/
benchmark_w
.
sum
()
final_res
[
i
]
=
res
/
benchmark_w
.
sum
()
print
(
'
D
ate: {0} predicting finished'
.
format
(
train_date
))
print
(
'
trade_d
ate: {0} predicting finished'
.
format
(
train_date
))
last_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
last_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
[
1
:]
+
[
last_date
])
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
[
1
:]
+
[
last_date
])
df
.
sort_index
(
inplace
=
True
)
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
df
.
cumsum
()
.
plot
()
plt
.
title
(
'
Prod
factors model training with Linear Regression from 2012 - 2017'
)
plt
.
title
(
'
Risk style
factors model training with Linear Regression from 2012 - 2017'
)
plt
.
show
()
plt
.
show
()
df
=
df
.
cumsum
()
df
.
to_csv
(
'd:/20120101_20170823_bt.csv'
)
alphamind/model/data_preparing.py
View file @
bd3c56b0
...
@@ -50,15 +50,15 @@ def prepare_data(engine: SqlEngine,
...
@@ -50,15 +50,15 @@ def prepare_data(engine: SqlEngine,
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
factors
=
transformer
,
dates
=
dates
,
dates
=
dates
,
warm_start
=
warm_start
)
.
sort_values
([
'
Date'
,
'C
ode'
])
warm_start
=
warm_start
)
.
sort_values
([
'
trade_date'
,
'c
ode'
])
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'
Date'
,
'C
ode'
])
.
dropna
()
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'
trade_date'
,
'c
ode'
])
.
dropna
()
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'
Date'
,
'C
ode'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'
trade_date'
,
'c
ode'
],
how
=
'left'
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
return
df
[[
'
Date'
,
'Code'
,
'dx'
]],
df
[[
'Date'
,
'C
ode'
,
'weight'
]
+
transformer
.
names
]
return
df
[[
'
trade_date'
,
'code'
,
'dx'
]],
df
[[
'trade_date'
,
'c
ode'
,
'weight'
]
+
transformer
.
names
]
def
batch_processing
(
x_values
,
def
batch_processing
(
x_values
,
...
@@ -137,10 +137,10 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -137,10 +137,10 @@ def fetch_data_package(engine: SqlEngine,
if
neutralized_risk
:
if
neutralized_risk
:
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
transformer
.
names
))
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
transformer
.
names
))
risk_df
=
risk_df
[[
'
Date'
,
'C
ode'
]
+
used_neutralized_risk
]
.
dropna
()
risk_df
=
risk_df
[[
'
trade_date'
,
'c
ode'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'
Date'
,
'C
ode'
])
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'
trade_date'
,
'c
ode'
])
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'
Date'
,
'Code'
])[[
'Date'
,
'C
ode'
,
'dx'
]]
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'
trade_date'
,
'code'
])[[
'trade_date'
,
'c
ode'
,
'dx'
]]
train_y
=
return_df
.
copy
()
train_y
=
return_df
.
copy
()
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
...
@@ -153,7 +153,7 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -153,7 +153,7 @@ def fetch_data_package(engine: SqlEngine,
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
y_values
=
train_y
[[
'dx'
]]
.
values
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
D
ate
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_d
ate
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
dates
=
np
.
unique
(
date_label
)
return_df
[
'weight'
]
=
train_x
[
'weight'
]
return_df
[
'weight'
]
=
train_x
[
'weight'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment