Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
1acf5b7a
Commit
1acf5b7a
authored
Aug 19, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
restructure fetch data
parent
c0cdfcde
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
65 additions
and
34 deletions
+65
-34
sqlengine.py
alphamind/data/engines/sqlengine.py
+65
-34
No files found.
alphamind/data/engines/sqlengine.py
View file @
1acf5b7a
...
...
@@ -8,6 +8,7 @@ Created on 2017-7-7
from
typing
import
Iterable
from
typing
import
List
from
typing
import
Dict
from
typing
import
Tuple
import
numpy
as
np
import
pandas
as
pd
import
sqlalchemy
as
sa
...
...
@@ -90,7 +91,7 @@ def append_industry_info(df):
[
industry_codes
[
row
][
0
]
for
row
in
industry_dummies
]
def
_map_risk_model_table
(
risk_model
)
:
def
_map_risk_model_table
(
risk_model
:
str
)
->
tuple
:
if
risk_model
==
'day'
:
return
RiskCovDay
,
SpecificRiskDay
elif
risk_model
==
'short'
:
...
...
@@ -101,12 +102,12 @@ def _map_risk_model_table(risk_model):
raise
ValueError
(
"risk model name {0} is not recognized"
.
format
(
risk_model
))
def
_map_factors
(
factors
)
:
factor_cols
=
[]
def
_map_factors
(
factors
:
Iterable
[
str
])
->
dict
:
factor_cols
=
{}
for
f
in
factors
:
for
t
in
factor_tables
:
if
f
in
t
.
__table__
.
columns
:
factor_cols
.
append
(
t
.
__table__
.
columns
[
f
])
factor_cols
[
t
.
__table__
.
columns
[
f
]]
=
t
break
return
factor_cols
...
...
@@ -166,52 +167,82 @@ class SqlEngine(object):
return
pd
.
read_sql
(
query
,
self
.
session
.
bind
)
def
fetch_data
(
self
,
ref_date
,
factors
:
Iterable
[
str
],
codes
:
Iterable
[
int
],
benchmark
:
int
=
None
,
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
risk_cov_table
,
special_risk_table
=
_map_risk_model_table
(
risk_model
)
def
fetch_factor
(
self
,
ref_date
:
str
,
factors
:
Iterable
[
str
],
codes
:
Iterable
[
int
])
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
big_table
=
outerjoin
(
Uqer
,
RiskExposure
,
and_
(
RiskExposure
.
Date
==
Uqer
.
Date
,
RiskExposure
.
Code
==
Uqer
.
Code
))
big_table
=
outerjoin
(
big_table
,
Market
,
and_
(
Market
.
Date
==
Uqer
.
Date
,
Market
.
Code
==
Uqer
.
Code
))
big_table
=
outerjoin
(
big_table
,
Tiny
,
and_
(
Tiny
.
Date
==
Uqer
.
Date
,
Tiny
.
Code
==
Uqer
.
Code
))
big_table
=
outerjoin
(
big_table
,
LegacyFactor
,
and_
(
LegacyFactor
.
Date
==
Uqer
.
Date
,
LegacyFactor
.
Code
==
Uqer
.
Code
))
big_table
=
outerjoin
(
big_table
,
special_risk_table
,
and_
(
special_risk_table
.
Date
==
Uqer
.
Date
,
special_risk_table
.
Code
==
Uqer
.
Code
))
big_table
=
Market
for
t
in
set
(
factor_cols
.
values
()):
big_table
=
outerjoin
(
big_table
,
t
,
and_
(
Market
.
Date
==
t
.
Date
,
Market
.
Code
==
t
.
Code
))
query
=
select
([
Uqer
.
Code
,
Market
.
isOpen
,
special_risk_table
.
SRISK
]
+
factor_cols
+
risk_exposure_cols
)
\
query
=
select
([
Market
.
Code
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
())
)
\
.
select_from
(
big_table
)
\
.
where
(
and_
(
Uqer
.
Date
==
ref_date
,
Uqer
.
Code
.
in_
(
codes
)))
.
where
(
and_
(
Market
.
Date
==
ref_date
,
Market
.
Code
.
in_
(
codes
)))
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_benchmark
(
self
,
ref_date
:
str
,
benchmark
:
int
)
->
pd
.
DataFrame
:
query
=
select
([
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
==
ref_date
,
IndexComponent
.
indexCode
==
benchmark
)
)
factor_data
=
pd
.
read_sql
(
query
,
self
.
engine
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
def
fetch_risk_model
(
self
,
ref_date
:
str
,
codes
:
Iterable
[
int
],
risk_model
:
str
=
'short'
)
->
Tuple
[
pd
.
DataFrame
,
pd
.
DataFrame
]:
risk_cov_table
,
special_risk_table
=
_map_risk_model_table
(
risk_model
)
cov_risk_cols
=
[
risk_cov_table
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
query
=
select
([
risk_cov_table
.
FactorID
,
risk_cov_table
.
Factor
]
+
cov_risk_cols
)
.
where
(
risk_cov_table
.
Date
==
ref_date
)
risk_cov
_data
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
(
'FactorID'
)
risk_cov
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
(
'FactorID'
)
total_data
=
{
'risk_cov'
:
risk_cov_data
}
risk_exposure_cols
=
[
RiskExposure
.
__table__
.
columns
[
f
]
for
f
in
total_risk_factors
]
big_table
=
outerjoin
(
special_risk_table
,
RiskExposure
,
and_
(
special_risk_table
.
Date
==
RiskExposure
.
Date
,
special_risk_table
.
Code
==
RiskExposure
.
Code
))
query
=
select
(
[
RiskExposure
.
Code
,
special_risk_table
.
SRISK
]
+
risk_exposure_cols
)
\
.
select_from
(
big_table
)
\
.
where
(
and_
(
RiskExposure
.
Date
==
ref_date
,
RiskExposure
.
Code
.
in_
(
codes
)))
if
benchmark
:
query
=
select
([
IndexComponent
.
Code
,
(
IndexComponent
.
weight
/
100.
)
.
label
(
'weight'
)])
.
where
(
and_
(
IndexComponent
.
Date
==
ref_date
,
IndexComponent
.
indexCode
==
benchmark
)
)
risk_exp
=
pd
.
read_sql
(
query
,
self
.
engine
)
return
risk_cov
,
risk_exp
def
fetch_data
(
self
,
ref_date
,
factors
:
Iterable
[
str
],
codes
:
Iterable
[
int
],
benchmark
:
int
=
None
,
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
benchmark_data
=
pd
.
read_sql
(
query
,
self
.
engine
)
total_data
=
{}
factor_data
=
self
.
fetch_factor
(
ref_date
,
factors
,
codes
)
if
benchmark
:
benchmark_data
=
self
.
fetch_benchmark
(
ref_date
,
benchmark
)
total_data
[
'benchmark'
]
=
benchmark_data
factor_data
=
pd
.
merge
(
factor_data
,
benchmark_data
,
how
=
'left'
,
on
=
[
'Code'
])
factor_data
[
'weight'
]
=
factor_data
[
'weight'
]
.
fillna
(
0.
)
if
risk_model
:
risk_cov
,
risk_exp
=
self
.
fetch_risk_model
(
ref_date
,
codes
,
risk_model
)
factor_data
=
pd
.
merge
(
factor_data
,
risk_exp
,
how
=
'left'
,
on
=
[
'Code'
])
total_data
[
'risk_cov'
]
=
risk_cov
total_data
[
'factor'
]
=
factor_data
append_industry_info
(
factor_data
)
...
...
@@ -219,14 +250,14 @@ class SqlEngine(object):
if
__name__
==
'__main__'
:
db_url
=
'postgresql+psycopg2://postgres:
A12345678!@10.63.6.220
/alpha'
db_url
=
'postgresql+psycopg2://postgres:
we083826@localhost
/alpha'
universe
=
Universe
(
'custom'
,
[
'zz500'
])
engine
=
SqlEngine
(
db_url
)
ref_date
=
'2017-08-10'
codes
=
engine
.
fetch_codes
(
ref_date
,
universe
)
data
=
engine
.
fetch_data
(
ref_date
,
[
'EPS'
],
codes
,
905
)
data
=
engine
.
fetch_data
(
ref_date
,
[
'EPS'
],
codes
,
905
,
'short'
)
d1ret
=
engine
.
fetch_dx_return
(
ref_date
,
codes
,
horizon
=
0
)
missing_codes
=
[
c
for
c
in
data
[
'factor'
]
.
Code
if
c
not
in
set
(
d1ret
.
Code
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment