Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
4d38f7b8
Commit
4d38f7b8
authored
Jul 08, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactor db engine
parent
de2d6d88
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
194 additions
and
0 deletions
+194
-0
__init__.py
alphamind/data/engines/__init__.py
+0
-0
sqlengine.py
alphamind/data/engines/sqlengine.py
+171
-0
universe.py
alphamind/data/engines/universe.py
+23
-0
No files found.
alphamind/data/engines/__init__.py
0 → 100644
View file @
4d38f7b8
alphamind/data/
stor
e.py
→
alphamind/data/
engines/sqlengin
e.py
View file @
4d38f7b8
# -*- coding: utf-8 -*-
"""
Created on 2017-
6-26
Created on 2017-
7-7
@author: cheng.li
"""
from
typing
import
Iterable
from
typing
import
Union
import
sqlalchemy
as
sa
from
typing
import
List
from
typing
import
Dict
import
numpy
as
np
import
pandas
as
pd
db_settings
=
{
'alpha'
:
{
'user'
:
'licheng'
,
'password'
:
'A12345678!'
,
'host'
:
'10.63.6.220'
,
'db'
:
'alpha'
,
'charset'
:
'utf8'
}
}
import
sqlalchemy
as
sa
from
alphamind.data.engines.universe
import
Universe
risk_styles
=
[
'BETA'
,
'MOMENTUM'
,
...
...
@@ -66,113 +57,115 @@ industry_styles = [
]
def
fetch_codes
(
codes
:
Union
[
str
,
Iterable
[
int
]],
start_date
,
end_date
,
engine
):
code_table
=
None
code_str
=
None
def
append_industry_info
(
df
):
industry_arr
=
np
.
array
(
industry_styles
)
industry_codes
=
np
.
arange
(
len
(
industry_styles
),
dtype
=
int
)
industry_dummies
=
df
[
industry_styles
]
.
values
.
astype
(
bool
)
if
isinstance
(
codes
,
str
):
# universe
sql
=
"select Date, Code from universe where Date >= '{0}' and Date <= '{1}' and universe = '{2}'"
\
.
format
(
start_date
,
end_date
,
codes
)
df
[
'industry'
],
df
[
'industry_code'
]
=
[
industry_arr
[
row
][
0
]
for
row
in
industry_dummies
],
\
[
industry_codes
[
row
][
0
]
for
row
in
industry_dummies
]
code_table
=
pd
.
read_sql
(
sql
,
engine
)
elif
hasattr
(
codes
,
'__iter__'
):
code_str
=
','
.
join
(
str
(
c
)
for
c
in
codes
)
class
SqlEngine
(
object
):
def
__init__
(
self
,
db_url
:
str
,
universe
:
Universe
):
self
.
engine
=
sa
.
create_engine
(
db_url
)
self
.
unv
=
universe
return
code_table
,
code_str
def
fetch_codes
(
self
,
ref_date
:
str
)
->
List
[
int
]:
def
get_universe
(
univ
,
ref_date
):
univ_str
=
','
.
join
(
"'"
+
u
+
"'"
for
u
in
univ
)
sql
=
"select distinct Code from universe where Date = '{ref_date}' and universe in ({univ_str})"
.
format
(
ref_date
=
ref_date
,
univ_str
=
univ_str
)
cursor
=
self
.
engine
.
execute
(
sql
)
codes_set
=
{
c
[
0
]
for
c
in
cursor
.
fetchall
()}
return
codes_set
def
industry_mapping
(
industry_arr
,
industry_codes
,
industry_dummies
):
return
[
industry_arr
[
row
][
0
]
for
row
in
industry_dummies
],
\
[
industry_codes
[
row
][
0
]
for
row
in
industry_dummies
],
codes_set
=
None
if
self
.
unv
.
include_universe
:
include_codes_set
=
get_universe
(
self
.
unv
.
include_universe
,
ref_date
)
codes_set
=
include_codes_set
def
append_industry_info
(
df
):
industry_arr
=
np
.
array
(
industry_styles
)
industry_codes
=
np
.
arange
(
len
(
industry_styles
),
dtype
=
int
)
industry_dummies
=
df
[
industry_styles
]
.
values
.
astype
(
bool
)
if
self
.
unv
.
exclude_universe
:
exclude_codes_set
=
get_universe
(
self
.
unv
.
exclude_universe
,
ref_date
)
codes_set
-=
exclude_codes_set
df
[
'industry'
],
df
[
'industry_code'
]
=
industry_mapping
(
industry_arr
,
industry_codes
,
industry_dummies
)
if
self
.
unv
.
include_codes
:
codes_set
=
codes_set
.
union
(
self
.
unv
.
include_codes
)
if
self
.
unv
.
exclude_codes
:
codes_set
-=
set
(
self
.
unv
.
exclude_codes
)
def
fetch_data
(
factors
:
Iterable
[
str
],
start_date
:
str
,
end_date
:
str
,
codes
:
Union
[
str
,
Iterable
[
int
]]
=
None
,
benchmark
:
int
=
None
,
risk_model
:
str
=
'day'
)
->
dict
:
engine
=
sa
.
create_engine
(
'mssql+pymssql://{user}:{password}@{host}/{db}?charset={charset}'
.
format
(
**
db_settings
[
'alpha'
]))
return
sorted
(
codes_set
)
factor_str
=
','
.
join
(
'uqer.'
+
f
for
f
in
factors
)
code_table
,
code_str
=
fetch_codes
(
codes
,
start_date
,
end_date
,
engine
)
def
fetch_data
(
self
,
ref_date
,
factors
:
Iterable
[
str
],
codes
:
Iterable
[
int
],
benchmark
:
int
=
None
,
risk_model
:
str
=
'short'
)
->
Dict
[
str
,
pd
.
DataFrame
]:
total_risk_factors
=
risk_styles
+
industry_styles
risk_str
=
','
.
join
(
'risk_exposure.'
+
f
for
f
in
total_risk_factors
)
factor_str
=
','
.
join
(
'uqer.'
+
f
for
f
in
factors
)
special_risk_table
=
'specific_risk_'
+
risk_model
total_risk_factors
=
risk_styles
+
industry_styles
risk_str
=
','
.
join
(
'risk_exposure.'
+
f
for
f
in
total_risk_factors
)
if
code_str
:
sql
=
"select uqer.Date, uqer.Code, {0}, {3}, market.isOpen, daily_return.d1, {5}.SRISK"
\
" from (uqer INNER JOIN"
\
" risk_exposure on uqer.Date = risk_exposure.Date and uqer.Code = risk_exposure.Code)"
\
" INNER JOIN market on uqer.Date = market.Date and uqer.Code = market.Code"
\
" INNER JOIN daily_return on uqer.Date = daily_return.Date and uqer.Code = daily_return.Code"
\
" INNER JOIN {5} on uqer.Date = {5}.Date and uqer.Code = {5}.Code"
\
" where uqer.Date >= '{1}' and uqer.Date <= '{2}' and uqer.Code in ({4})"
.
format
(
factor_str
,
start_date
,
end_date
,
risk_str
,
code_str
,
special_risk_table
)
else
:
sql
=
"select uqer.Date, uqer.Code, {0}, {3}, market.isOpen, daily_return.d1, {4}.SRISK"
\
special_risk_table
=
'specific_risk_'
+
risk_model
codes_str
=
','
.
join
(
str
(
c
)
for
c
in
codes
)
sql
=
"select uqer.Code, {factors}, {risks}, market.isOpen, daily_return.d1, {risk_table}.SRISK"
\
" from (uqer INNER JOIN"
\
" risk_exposure on uqer.Date = risk_exposure.Date and uqer.Code = risk_exposure.Code)"
\
" INNER JOIN market on uqer.Date = market.Date and uqer.Code = market.Code"
\
" INNER JOIN daily_return on uqer.Date = daily_return.Date and uqer.Code = daily_return.Code"
\
" INNER JOIN {
4} on uqer.Date = {4}.Date and uqer.Code = {4
}.Code"
\
" where uqer.Date
>= '{1}' and uqer.Date <= '{2}'"
.
format
(
factor_str
,
start
_date
,
end_date
,
risk_str
,
special_risk_table
)
" INNER JOIN {
risk_table} on uqer.Date = {risk_table}.Date and uqer.Code = {risk_table
}.Code"
\
" where uqer.Date
= '{ref_date}' and uqer.Code in ({codes})"
.
format
(
factors
=
factor_str
,
ref_date
=
ref
_date
,
codes
=
codes_str
,
risks
=
risk_str
,
risk_table
=
special_risk_table
)
factor_data
=
pd
.
read_sql
(
sql
,
engine
)
factor_data
=
pd
.
read_sql
(
sql
,
self
.
engine
)
if
code_table
is
not
None
:
factor_data
=
pd
.
merge
(
factor_data
,
code_table
,
on
=
[
'Date'
,
'Code'
]
)
risk_cov_table
=
'risk_cov_'
+
risk_model
risk_str
=
','
.
join
(
risk_cov_table
+
'.'
+
f
for
f
in
total_risk_factors
)
risk_cov_table
=
'risk_cov_'
+
risk_model
risk_str
=
','
.
join
(
risk_cov_table
+
'.'
+
f
for
f
in
total_risk_factors
)
sql
=
"select FactorID, Factor, {risks} from {risk_table} where Date = '{ref_date}'"
.
format
(
ref_date
=
ref_date
,
risks
=
risk_str
,
risk_table
=
risk_cov_table
)
sql
=
"select Date, FactorID, Factor, {0} from {1} where Date >= '{2}' and Date <= '{3}'"
.
format
(
risk_str
,
risk_cov_table
,
start_date
,
end_date
)
risk_cov_data
=
pd
.
read_sql
(
sql
,
self
.
engine
)
.
sort_values
(
'FactorID'
)
risk_cov_data
=
pd
.
read_sql
(
sql
,
engine
)
total_data
=
{
'factor'
:
factor_data
,
'risk_cov'
:
risk_cov_data
}
total_data
=
{
'factor'
:
factor_data
,
'risk_cov'
:
risk_cov_data
}
if
benchmark
:
sql
=
"select Code, weight / 100. as weight from index_components "
\
"where Date = '{ref_date}' and indexCode = {benchmakr}"
.
format
(
ref_date
=
ref_date
,
benchmakr
=
benchmark
)
if
benchmark
:
sql
=
"select Date, Code, weight / 100. as weight from index_components "
\
"where Date >= '{0}' and Date <= '{1}' and indexCode = {2}"
.
format
(
start_date
,
end_date
,
benchmark
)
benchmark_data
=
pd
.
read_sql
(
sql
,
self
.
engine
)
total_data
[
'benchmark'
]
=
benchmark_data
benchmark_data
=
pd
.
read_sql
(
sql
,
engine
)
total_data
[
'benchmark'
]
=
benchmark_data
append_industry_info
(
factor_data
)
return
total_data
append_industry_info
(
factor_data
)
return
total_data
if
__name__
==
'__main__'
:
db_url
=
'mysql+mysqldb://root:we083826@localhost/alpha?charset=utf8'
universe
=
Universe
([
'zz500'
])
engine
=
SqlEngine
(
db_url
,
universe
)
ref_date
=
'2017-07-04'
import
datetime
as
dt
start
=
dt
.
datetime
.
now
()
res
=
fetch_data
([
'EPS'
],
'2017-01-03'
,
'2017-06-05'
,
benchmark
=
905
,
codes
=
'zz500'
)
print
(
res
)
for
i
in
range
(
500
):
codes
=
engine
.
fetch_codes
(
'2017-07-04'
)
total_data
=
engine
.
fetch_data
(
ref_date
,
[
'EPS'
],
[
1
,
5
],
905
)
print
(
dt
.
datetime
.
now
()
-
start
)
print
(
total_data
)
alphamind/data/engines/universe.py
0 → 100644
View file @
4d38f7b8
# -*- coding: utf-8 -*-
"""
Created on 2017-7-7
@author: cheng.li
"""
from
typing
import
Iterable
class
Universe
(
object
):
def
__init__
(
self
,
include_universe
:
Iterable
[
str
]
=
None
,
exclude_universe
:
Iterable
[
str
]
=
None
,
include_codes
:
Iterable
[
str
]
=
None
,
exclude_codes
:
Iterable
[
str
]
=
None
):
self
.
include_universe
=
include_universe
self
.
exclude_universe
=
exclude_universe
self
.
include_codes
=
include_codes
self
.
exclude_codes
=
exclude_codes
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment