Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2daf5bbd
Commit
2daf5bbd
authored
Aug 20, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added fetch daily return range
parent
e623409d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
124 additions
and
84 deletions
+124
-84
sqlengine.py
alphamind/data/engines/sqlengine.py
+78
-51
universe.py
alphamind/data/engines/universe.py
+9
-18
quantile_analysis_example.py
alphamind/examples/quantile_analysis_example.py
+37
-15
No files found.
alphamind/data/engines/sqlengine.py
View file @
2daf5bbd
...
...
@@ -13,7 +13,7 @@ import numpy as np
import
pandas
as
pd
import
sqlalchemy
as
sa
import
sqlalchemy.orm
as
orm
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
,
over
from
sqlalchemy.sql
import
func
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.dbmodel.models
import
FactorMaster
...
...
@@ -151,13 +151,17 @@ class SqlEngine(object):
def
fetch_codes_range
(
self
,
universe
:
Universe
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
query
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_dx_return
(
self
,
ref_date
,
codes
,
expiry_date
=
None
,
horizon
=
0
):
def
fetch_dx_return
(
self
,
ref_date
:
str
,
codes
:
Iterable
[
int
],
expiry_date
:
str
=
None
,
horizon
:
int
=
0
)
->
pd
.
DataFrame
:
start_date
=
ref_date
if
not
expiry_date
:
...
...
@@ -165,7 +169,7 @@ class SqlEngine(object):
else
:
end_date
=
expiry_date
query
=
select
([
DailyReturn
.
Code
,
func
.
sum
(
func
.
l
og
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
query
=
select
([
DailyReturn
.
Code
,
func
.
sum
(
func
.
l
n
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
and_
(
DailyReturn
.
Date
.
between
(
start_date
,
end_date
),
DailyReturn
.
Code
.
in_
(
codes
)
...
...
@@ -174,6 +178,38 @@ class SqlEngine(object):
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
def
fetch_dx_return_range
(
self
,
universe
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
horizon
:
int
=
0
)
->
pd
.
DataFrame
:
if
dates
:
start_date
=
dates
[
0
]
end_date
=
dates
[
-
1
]
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
horizon
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
DailyReturn
,
q2
,
and_
(
DailyReturn
.
Date
==
q2
.
c
.
Date
,
DailyReturn
.
Code
==
q2
.
c
.
Code
))
stats
=
func
.
sum
(
func
.
ln
(
1.
+
DailyReturn
.
d1
))
.
over
(
partition_by
=
DailyReturn
.
Code
,
order_by
=
DailyReturn
.
Date
,
rows
=
(
0
,
horizon
))
.
label
(
'dx'
)
query
=
select
([
DailyReturn
.
Date
,
DailyReturn
.
Code
,
stats
])
\
.
select_from
(
big_table
)
\
.
where
(
DailyReturn
.
Date
.
between
(
start_date
,
end_date
))
df
=
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
if
dates
:
df
=
df
[
df
.
Date
.
isin
(
dates
)]
return
df
def
fetch_factor
(
self
,
ref_date
:
str
,
factors
:
Iterable
[
str
],
...
...
@@ -193,9 +229,9 @@ class SqlEngine(object):
def
fetch_factor_range
(
self
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
...
...
@@ -223,25 +259,19 @@ class SqlEngine(object):
def
fetch_benchmark_range
(
self
,
benchmark
:
int
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
if
dates
:
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
cond
=
IndexComponent
.
Date
.
in_
(
dates
)
if
dates
else
IndexComponent
.
Date
.
between
(
start_date
,
end_date
)
query
=
select
(
[
IndexComponent
.
Date
,
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
.
in_
(
dates
)
,
cond
,
IndexComponent
.
indexCode
==
benchmark
)
)
else
:
query
=
select
([
IndexComponent
.
Date
,
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
.
between
(
start_date
,
end_date
),
IndexComponent
.
indexCode
==
benchmark
)
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_risk_model
(
self
,
...
...
@@ -273,28 +303,23 @@ class SqlEngine(object):
def
fetch_risk_model_range
(
self
,
universe
:
Universe
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
risk_model
:
str
=
'short'
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
risk_model
:
str
=
'short'
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
risk_cov_table
,
special_risk_table
=
_map_risk_model_table
(
risk_model
)
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
if
dates
:
query
=
select
([
risk_cov_table
.
Date
,
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
Date
.
in_
(
dates
)
)
else
:
cond
=
risk_cov_table
.
Date
.
in_
(
dates
)
if
dates
else
risk_cov_table
.
Date
.
between
(
start_date
,
end_date
)
query
=
select
([
risk_cov_table
.
Date
,
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
Date
.
between
(
start_date
,
end_date
)
cond
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'Date'
,
'FactorID'
])
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
...
...
@@ -303,7 +328,8 @@ class SqlEngine(object):
special_risk_table
.
Code
==
RiskExposure
.
Code
))
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
big_table
,
q2
,
and_
(
special_risk_table
.
Date
==
q2
.
c
.
Date
,
special_risk_table
.
Code
==
q2
.
c
.
Code
))
big_table
=
join
(
big_table
,
q2
,
and_
(
special_risk_table
.
Date
==
q2
.
c
.
Date
,
special_risk_table
.
Code
==
q2
.
c
.
Code
))
query
=
select
(
[
RiskExposure
.
Date
,
RiskExposure
.
Code
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
...
...
@@ -342,9 +368,9 @@ class SqlEngine(object):
def
fetch_data_range
(
self
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
benchmark
:
int
=
None
,
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
...
...
@@ -377,7 +403,8 @@ if __name__ == '__main__':
ref_date
=
'2017-08-10'
codes
=
engine
.
fetch_codes_range
(
universe
,
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
])
data
=
engine
.
fetch_data_range
(
universe
,
[
'EPS'
],
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
],
905
,
'short'
)
data1
=
engine
.
fetch_dx_return
(
'2017-08-01'
,
)
data2
=
engine
.
fetch_dx_return_range
(
universe
,
'2017-08-01'
,
'2017-08-10'
,
[
'2017-08-01'
,
'2017-08-10'
])
print
(
codes
)
print
(
data
)
alphamind/data/engines/universe.py
View file @
2daf5bbd
...
...
@@ -76,20 +76,11 @@ class Universe(object):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
Code
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
if
dates
:
query
=
query
.
where
(
and_
(
UniverseTable
.
Date
.
in_
(
dates
),
or_
(
and_
(
*
all_and_conditions
),
*
all_or_conditions
)
)
)
else
:
dates_cond
=
UniverseTable
.
Date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
Date
.
between
(
start_date
,
end_date
)
query
=
query
.
where
(
and_
(
UniverseTable
.
Date
.
between
(
start_date
,
end_date
)
,
dates_cond
,
or_
(
and_
(
*
all_and_conditions
),
*
all_or_conditions
...
...
alphamind/examples/quantile_analysis_example.py
View file @
2daf5bbd
...
...
@@ -8,38 +8,56 @@ Created on 2017-8-16
import
numpy
as
np
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
PyFin.api
import
makeSchedule
from
PyFin.api
import
*
from
alphamind.api
import
*
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
#engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220
/alpha')
universe
=
Universe
(
'custom'
,
[
'
pm500_mirror
'
])
#
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost
/alpha'
)
universe
=
Universe
(
'custom'
,
[
'
zz500
'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
n_bins
=
5
factor_weights
=
np
.
array
([
1.
])
dates
=
makeSchedule
(
'2016-08-14'
,
'2017-08-14'
,
tenor
=
'1w'
,
freq
=
'1w'
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
start_date
=
'2016-04-01'
end_date
=
'2017-08-16'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
prod_factors
=
[
'EARNYILD'
,
'ROAEBIT'
]
prod_factors
=
[
'EARNYILD'
,
'ROAEBIT'
,
'CHV'
,
'CFinc1'
]
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
start_date
,
end_date
,
dates
,
horizon
=
horizon
)
for
factor
in
prod_factors
:
factors
=
[
factor
]
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
for
i
,
date
in
enumerate
(
dates
):
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
codes
=
engine
.
fetch_codes
(
ref_date
,
universe
)
factor_groups
=
factor_all_data
.
groupby
(
'Date'
)
return_groups
=
return_all_data
.
groupby
(
'Date'
)
data
=
engine
.
fetch_data
(
ref_date
,
factors
,
codes
,
905
)
returns
=
engine
.
fetch_dx_return
(
ref_date
,
codes
,
horizon
=
4
)
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
]
codes
=
data
.
Code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
[
'factor'
]
,
returns
,
on
=
[
'Code'
])
.
dropna
()
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'Code'
])
.
dropna
()
print
(
date
,
': '
,
len
(
total_data
))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
...
...
@@ -60,6 +78,10 @@ for factor in prod_factors:
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1m'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
plt
.
title
(
'{0} weekly re-balance'
.
format
(
factors
[
0
]))
plt
.
savefig
(
'{0}_big_universe_20170814.png'
.
format
(
factors
[
0
]))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment