Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2daf5bbd
Commit
2daf5bbd
authored
Aug 20, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added fetch daily return range
parent
e623409d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
124 additions
and
84 deletions
+124
-84
sqlengine.py
alphamind/data/engines/sqlengine.py
+78
-51
universe.py
alphamind/data/engines/universe.py
+9
-18
quantile_analysis_example.py
alphamind/examples/quantile_analysis_example.py
+37
-15
No files found.
alphamind/data/engines/sqlengine.py
View file @
2daf5bbd
...
@@ -13,7 +13,7 @@ import numpy as np
...
@@ -13,7 +13,7 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
sqlalchemy
as
sa
import
sqlalchemy
as
sa
import
sqlalchemy.orm
as
orm
import
sqlalchemy.orm
as
orm
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
,
over
from
sqlalchemy.sql
import
func
from
sqlalchemy.sql
import
func
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.dbmodel.models
import
FactorMaster
from
alphamind.data.dbmodel.models
import
FactorMaster
...
@@ -151,13 +151,17 @@ class SqlEngine(object):
...
@@ -151,13 +151,17 @@ class SqlEngine(object):
def
fetch_codes_range
(
self
,
def
fetch_codes_range
(
self
,
universe
:
Universe
,
universe
:
Universe
,
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
query
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
query
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_dx_return
(
self
,
ref_date
,
codes
,
expiry_date
=
None
,
horizon
=
0
):
def
fetch_dx_return
(
self
,
ref_date
:
str
,
codes
:
Iterable
[
int
],
expiry_date
:
str
=
None
,
horizon
:
int
=
0
)
->
pd
.
DataFrame
:
start_date
=
ref_date
start_date
=
ref_date
if
not
expiry_date
:
if
not
expiry_date
:
...
@@ -165,7 +169,7 @@ class SqlEngine(object):
...
@@ -165,7 +169,7 @@ class SqlEngine(object):
else
:
else
:
end_date
=
expiry_date
end_date
=
expiry_date
query
=
select
([
DailyReturn
.
Code
,
func
.
sum
(
func
.
l
og
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
query
=
select
([
DailyReturn
.
Code
,
func
.
sum
(
func
.
l
n
(
1.
+
DailyReturn
.
d1
))
.
label
(
'dx'
)])
.
where
(
and_
(
and_
(
DailyReturn
.
Date
.
between
(
start_date
,
end_date
),
DailyReturn
.
Date
.
between
(
start_date
,
end_date
),
DailyReturn
.
Code
.
in_
(
codes
)
DailyReturn
.
Code
.
in_
(
codes
)
...
@@ -174,6 +178,38 @@ class SqlEngine(object):
...
@@ -174,6 +178,38 @@ class SqlEngine(object):
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
def
fetch_dx_return_range
(
self
,
universe
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
horizon
:
int
=
0
)
->
pd
.
DataFrame
:
if
dates
:
start_date
=
dates
[
0
]
end_date
=
dates
[
-
1
]
end_date
=
advanceDateByCalendar
(
'china.sse'
,
end_date
,
str
(
horizon
)
+
'b'
)
.
strftime
(
'
%
Y-
%
m-
%
d'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
DailyReturn
,
q2
,
and_
(
DailyReturn
.
Date
==
q2
.
c
.
Date
,
DailyReturn
.
Code
==
q2
.
c
.
Code
))
stats
=
func
.
sum
(
func
.
ln
(
1.
+
DailyReturn
.
d1
))
.
over
(
partition_by
=
DailyReturn
.
Code
,
order_by
=
DailyReturn
.
Date
,
rows
=
(
0
,
horizon
))
.
label
(
'dx'
)
query
=
select
([
DailyReturn
.
Date
,
DailyReturn
.
Code
,
stats
])
\
.
select_from
(
big_table
)
\
.
where
(
DailyReturn
.
Date
.
between
(
start_date
,
end_date
))
df
=
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
if
dates
:
df
=
df
[
df
.
Date
.
isin
(
dates
)]
return
df
def
fetch_factor
(
self
,
def
fetch_factor
(
self
,
ref_date
:
str
,
ref_date
:
str
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
str
],
...
@@ -193,9 +229,9 @@ class SqlEngine(object):
...
@@ -193,9 +229,9 @@ class SqlEngine(object):
def
fetch_factor_range
(
self
,
def
fetch_factor_range
(
self
,
universe
:
Universe
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
str
],
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
factor_cols
=
_map_factors
(
factors
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
...
@@ -223,25 +259,19 @@ class SqlEngine(object):
...
@@ -223,25 +259,19 @@ class SqlEngine(object):
def
fetch_benchmark_range
(
self
,
def
fetch_benchmark_range
(
self
,
benchmark
:
int
,
benchmark
:
int
,
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
if
dates
:
query
=
select
(
cond
=
IndexComponent
.
Date
.
in_
(
dates
)
if
dates
else
IndexComponent
.
Date
.
between
(
start_date
,
end_date
)
[
IndexComponent
.
Date
,
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
.
in_
(
dates
),
IndexComponent
.
indexCode
==
benchmark
)
)
else
:
query
=
select
([
IndexComponent
.
Date
,
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
.
between
(
start_date
,
end_date
),
IndexComponent
.
indexCode
==
benchmark
)
)
query
=
select
(
[
IndexComponent
.
Date
,
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
cond
,
IndexComponent
.
indexCode
==
benchmark
)
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_risk_model
(
self
,
def
fetch_risk_model
(
self
,
...
@@ -273,28 +303,23 @@ class SqlEngine(object):
...
@@ -273,28 +303,23 @@ class SqlEngine(object):
def
fetch_risk_model_range
(
self
,
def
fetch_risk_model_range
(
self
,
universe
:
Universe
,
universe
:
Universe
,
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
dates
:
Iterable
[
str
]
=
None
,
risk_model
:
str
=
'short'
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
risk_model
:
str
=
'short'
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
risk_cov_table
,
special_risk_table
=
_map_risk_model_table
(
risk_model
)
risk_cov_table
,
special_risk_table
=
_map_risk_model_table
(
risk_model
)
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
if
dates
:
query
=
select
([
risk_cov_table
.
Date
,
cond
=
risk_cov_table
.
Date
.
in_
(
dates
)
if
dates
else
risk_cov_table
.
Date
.
between
(
start_date
,
end_date
)
risk_cov_table
.
FactorID
,
query
=
select
([
risk_cov_table
.
Date
,
risk_cov_table
.
Factor
]
risk_cov_table
.
FactorID
,
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
Factor
]
risk_cov_table
.
Date
.
in_
(
dates
)
+
cov_risk_cols
)
.
where
(
)
cond
else
:
)
query
=
select
([
risk_cov_table
.
Date
,
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
Date
.
between
(
start_date
,
end_date
)
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'Date'
,
'FactorID'
])
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'Date'
,
'FactorID'
])
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
...
@@ -303,7 +328,8 @@ class SqlEngine(object):
...
@@ -303,7 +328,8 @@ class SqlEngine(object):
special_risk_table
.
Code
==
RiskExposure
.
Code
))
special_risk_table
.
Code
==
RiskExposure
.
Code
))
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
big_table
=
join
(
big_table
,
q2
,
and_
(
special_risk_table
.
Date
==
q2
.
c
.
Date
,
special_risk_table
.
Code
==
q2
.
c
.
Code
))
big_table
=
join
(
big_table
,
q2
,
and_
(
special_risk_table
.
Date
==
q2
.
c
.
Date
,
special_risk_table
.
Code
==
q2
.
c
.
Code
))
query
=
select
(
query
=
select
(
[
RiskExposure
.
Date
,
RiskExposure
.
Code
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
[
RiskExposure
.
Date
,
RiskExposure
.
Code
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
...
@@ -342,9 +368,9 @@ class SqlEngine(object):
...
@@ -342,9 +368,9 @@ class SqlEngine(object):
def
fetch_data_range
(
self
,
def
fetch_data_range
(
self
,
universe
:
Universe
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
str
],
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
,
dates
:
Iterable
[
str
]
=
None
,
benchmark
:
int
=
None
,
benchmark
:
int
=
None
,
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
...
@@ -377,7 +403,8 @@ if __name__ == '__main__':
...
@@ -377,7 +403,8 @@ if __name__ == '__main__':
ref_date
=
'2017-08-10'
ref_date
=
'2017-08-10'
codes
=
engine
.
fetch_codes_range
(
universe
,
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
])
codes
=
engine
.
fetch_codes_range
(
universe
,
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
])
data
=
engine
.
fetch_data_range
(
universe
,
[
'EPS'
],
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
],
905
,
'short'
)
data1
=
engine
.
fetch_dx_return
(
'2017-08-01'
,
)
data2
=
engine
.
fetch_dx_return_range
(
universe
,
'2017-08-01'
,
'2017-08-10'
,
[
'2017-08-01'
,
'2017-08-10'
])
print
(
codes
)
print
(
codes
)
print
(
data
)
print
(
data
)
alphamind/data/engines/universe.py
View file @
2daf5bbd
...
@@ -76,25 +76,16 @@ class Universe(object):
...
@@ -76,25 +76,16 @@ class Universe(object):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
Code
])
.
distinct
()
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
Code
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
if
dates
:
dates_cond
=
UniverseTable
.
Date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
Date
.
between
(
start_date
,
end_date
)
query
=
query
.
where
(
and_
(
query
=
query
.
where
(
UniverseTable
.
Date
.
in_
(
dates
),
and_
(
or_
(
dates_cond
,
and_
(
*
all_and_conditions
),
or_
(
*
all_or_conditions
and_
(
*
all_and_conditions
),
)
*
all_or_conditions
)
)
else
:
query
=
query
.
where
(
and_
(
UniverseTable
.
Date
.
between
(
start_date
,
end_date
),
or_
(
and_
(
*
all_and_conditions
),
*
all_or_conditions
)
)
)
)
)
)
return
query
return
query
alphamind/examples/quantile_analysis_example.py
View file @
2daf5bbd
...
@@ -8,38 +8,56 @@ Created on 2017-8-16
...
@@ -8,38 +8,56 @@ Created on 2017-8-16
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
matplotlib
import
pyplot
as
plt
from
PyFin.api
import
makeSchedule
from
PyFin.api
import
*
from
alphamind.api
import
*
from
alphamind.api
import
*
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
#
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
#engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220
/alpha')
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:we083826@localhost
/alpha'
)
universe
=
Universe
(
'custom'
,
[
'
pm500_mirror
'
])
universe
=
Universe
(
'custom'
,
[
'
zz500
'
])
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
n_bins
=
5
n_bins
=
5
factor_weights
=
np
.
array
([
1.
])
factor_weights
=
np
.
array
([
1.
])
dates
=
makeSchedule
(
'2016-08-14'
,
freq
=
'1w'
'2017-08-14'
,
tenor
=
'1w'
,
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
start_date
=
'2016-04-01'
end_date
=
'2017-08-16'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
calendar
=
'china.sse'
)
prod_factors
=
[
'EARNYILD'
,
'ROAEBIT'
]
prod_factors
=
[
'EARNYILD'
,
'ROAEBIT'
,
'CHV'
,
'CFinc1'
]
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
return_all_data
=
engine
.
fetch_dx_return_range
(
universe
,
start_date
,
end_date
,
dates
,
horizon
=
horizon
)
for
factor
in
prod_factors
:
for
factor
in
prod_factors
:
factors
=
[
factor
]
factors
=
[
factor
]
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
for
i
,
date
in
enumerate
(
dates
):
factor_groups
=
factor_all_data
.
groupby
(
'Date'
)
ref_date
=
date
.
strftime
(
'
%
Y-
%
m-
%
d'
)
return_groups
=
return_all_data
.
groupby
(
'Date'
)
codes
=
engine
.
fetch_codes
(
ref_date
,
universe
)
data
=
engine
.
fetch_data
(
ref_date
,
factors
,
codes
,
905
)
for
i
,
value
in
enumerate
(
factor_groups
):
returns
=
engine
.
fetch_dx_return
(
ref_date
,
codes
,
horizon
=
4
)
date
=
value
[
0
]
data
=
value
[
1
]
codes
=
data
.
Code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
return_groups
.
get_group
(
date
)
total_data
=
pd
.
merge
(
data
[
'factor'
]
,
returns
,
on
=
[
'Code'
])
.
dropna
()
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'Code'
])
.
dropna
()
print
(
date
,
': '
,
len
(
total_data
))
print
(
date
,
': '
,
len
(
total_data
))
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
dx_return
=
total_data
.
dx
.
values
...
@@ -60,6 +78,10 @@ for factor in prod_factors:
...
@@ -60,6 +78,10 @@ for factor in prod_factors:
final_res
[
i
]
=
res
/
benchmark
.
sum
()
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1m'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
plt
.
title
(
'{0} weekly re-balance'
.
format
(
factors
[
0
]))
plt
.
title
(
'{0} weekly re-balance'
.
format
(
factors
[
0
]))
plt
.
savefig
(
'{0}_big_universe_20170814.png'
.
format
(
factors
[
0
]))
plt
.
savefig
(
'{0}_big_universe_20170814.png'
.
format
(
factors
[
0
]))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment