Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
459abdd1
Commit
459abdd1
authored
Aug 23, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added interface for alpha formula definition
parent
171f1d75
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
212 additions
and
19 deletions
+212
-19
sqlengine.py
alphamind/data/engines/sqlengine.py
+33
-9
transformer.py
alphamind/data/transformer.py
+75
-0
factor_analysis_example.py
alphamind/examples/factor_analysis_example.py
+14
-7
formula_expression.py
alphamind/examples/formula_expression.py
+87
-0
plot_quantile_res.py
alphamind/examples/plot_quantile_res.py
+3
-3
No files found.
alphamind/data/engines/sqlengine.py
View file @
459abdd1
...
...
@@ -13,7 +13,7 @@ import numpy as np
import
pandas
as
pd
import
sqlalchemy
as
sa
import
sqlalchemy.orm
as
orm
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
,
over
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
from
sqlalchemy.sql
import
func
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.dbmodel.models
import
FactorMaster
...
...
@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from
alphamind.data.dbmodel.models
import
RiskCovLong
from
alphamind.data.dbmodel.models
import
RiskExposure
from
alphamind.data.dbmodel.models
import
Market
from
alphamind.data.transformer
import
Transformer
from
PyFin.api
import
advanceDateByCalendar
risk_styles
=
[
'BETA'
,
...
...
@@ -218,9 +219,13 @@ class SqlEngine(object):
def
fetch_factor
(
self
,
ref_date
:
str
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
object
],
codes
:
Iterable
[
int
])
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
transformer
=
Transformer
(
factors
)
dependency
=
transformer
.
dependency
factor_cols
=
_map_factors
(
dependency
)
big_table
=
Market
for
t
in
set
(
factor_cols
.
values
()):
...
...
@@ -230,15 +235,24 @@ class SqlEngine(object):
.
select_from
(
big_table
)
\
.
where
(
and_
(
Market
.
Date
==
ref_date
,
Market
.
Code
.
in_
(
codes
)))
return
pd
.
read_sql
(
query
,
self
.
engine
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
res
=
transformer
.
transform
(
'Code'
,
df
)
for
col
in
res
.
columns
:
if
col
not
in
set
([
'Code'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
return
df
def
fetch_factor_range
(
self
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
object
],
start_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
transformer
=
Transformer
(
factors
)
dependency
=
transformer
.
dependency
factor_cols
=
_map_factors
(
dependency
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
...
...
@@ -249,7 +263,14 @@ class SqlEngine(object):
query
=
select
([
Market
.
Date
,
Market
.
Code
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
.
select_from
(
big_table
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'Date'
,
'Code'
])
.
set_index
(
'Date'
)
res
=
transformer
.
transform
(
'Code'
,
df
)
for
col
in
res
.
columns
:
if
col
not
in
set
([
'Code'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
return
df
.
reset_index
()
def
fetch_benchmark
(
self
,
ref_date
:
str
,
...
...
@@ -402,13 +423,16 @@ class SqlEngine(object):
if
__name__
==
'__main__'
:
from
PyFin.api
import
*
db_url
=
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url
=
'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
universe
=
Universe
(
'custom'
,
[
'zz500'
])
engine
=
SqlEngine
(
db_url
)
ref_date
=
'2017-08-10'
codes
=
engine
.
fetch_codes_range
(
universe
,
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
])
data2
=
engine
.
fetch_dx_return_range
(
universe
,
'2017-08-01'
,
'2017-08-10'
,
[
'2017-08-01'
,
'2017-08-10'
])
codes
=
engine
.
fetch_codes
(
universe
=
universe
,
ref_date
=
'2017-08-10'
)
MAXIMUM
((
'EPS'
,
'ROEDiluted'
))
data2
=
engine
.
fetch_factor_range
(
universe
=
universe
,
dates
=
[
'2017-08-01'
,
'2017-08-10'
],
factors
=
{
'factor'
:
MAXIMUM
((
'EPS'
,
'ROEDiluted'
))})
print
(
codes
)
print
(
data2
)
alphamind/data/transformer.py
0 → 100644
View file @
459abdd1
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import
pandas
as
pd
from
PyFin.api
import
pyFinAssert
from
PyFin.Analysis.SecurityValueHolders
import
SecurityValueHolder
from
PyFin.api
import
transform
as
transform_impl
DEFAULT_FACTOR_NAME
=
'user_factor'
def
factor_translator
(
factor_pool
):
if
isinstance
(
factor_pool
,
str
):
return
{
factor_pool
:
factor_pool
},
[
factor_pool
]
elif
isinstance
(
factor_pool
,
SecurityValueHolder
):
return
{
DEFAULT_FACTOR_NAME
:
factor_pool
},
sorted
(
factor_pool
.
fields
)
elif
isinstance
(
factor_pool
,
dict
):
dependency
=
set
()
for
k
,
v
in
factor_pool
.
items
():
pyFinAssert
(
isinstance
(
k
,
str
),
ValueError
,
'factor_name {0} should be string.'
.
format
(
k
))
pyFinAssert
(
isinstance
(
v
,
SecurityValueHolder
)
or
isinstance
(
v
,
str
),
ValueError
,
'expression {0} should be a value hodler or a string.'
.
format
(
v
))
if
isinstance
(
v
,
str
):
dependency
=
dependency
.
union
([
v
])
else
:
dependency
=
dependency
.
union
(
v
.
fields
)
return
factor_pool
,
sorted
(
dependency
)
elif
isinstance
(
factor_pool
,
list
):
factor_dict
=
{}
dependency
=
set
()
k
=
1
for
i
,
f
in
enumerate
(
factor_pool
):
if
isinstance
(
f
,
str
):
factor_dict
[
f
]
=
f
dependency
=
dependency
.
union
([
f
])
elif
isinstance
(
f
,
SecurityValueHolder
):
factor_dict
[
DEFAULT_FACTOR_NAME
+
'_'
+
str
(
k
)
.
zfill
(
3
)]
=
f
dependency
=
dependency
.
union
(
f
.
fields
)
k
+=
1
return
factor_dict
,
sorted
(
dependency
)
else
:
raise
ValueError
(
'{0} is not in valid format as factors'
.
format
(
factor_pool
))
class
Transformer
(
object
):
def
__init__
(
self
,
expressions
):
expression_dict
,
expression_dependency
=
\
factor_translator
(
expressions
)
res
=
list
(
zip
(
*
list
(
expression_dict
.
items
())))
self
.
names
=
list
(
res
[
0
])
self
.
expressions
=
list
(
res
[
1
])
self
.
dependency
=
expression_dependency
def
transform
(
self
,
group_name
,
data
):
if
len
(
data
)
>
0
:
transformed_data
=
transform_impl
(
data
,
self
.
expressions
,
self
.
names
,
group_name
,
dropna
=
False
)
return
transformed_data
else
:
return
pd
.
DataFrame
()
alphamind/examples/factor_analysis_example.py
View file @
459abdd1
...
...
@@ -8,8 +8,8 @@ Created on 2017-8-16
import
numpy
as
np
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
PyFin.api
import
makeSchedule
from
alphamind.api
import
*
from
PyFin.api
import
*
strategies
=
{
...
...
@@ -27,8 +27,8 @@ strategies = {
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
benchmark_code
=
905
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
constraint_risk
=
[
'SIZE'
]
+
industry_styles
neutralize_risk
=
industry_styles
constraint_risk
=
industry_styles
freq
=
'1w'
if
freq
==
'1m'
:
...
...
@@ -38,8 +38,8 @@ elif freq == '1w':
elif
freq
==
'1d'
:
horizon
=
0
dates
=
makeSchedule
(
'201
2-01-14
'
,
'2017-08-1
4
'
,
dates
=
makeSchedule
(
'201
7-01-01
'
,
'2017-08-1
8
'
,
tenor
=
freq
,
calendar
=
'china.sse'
)
...
...
@@ -75,7 +75,7 @@ for strategy in strategies:
risk_target
=
risk_exp_expand
.
T
@
benchmark
lbound
=
np
.
zeros
(
len
(
total_data
))
ubound
=
0.0
1
+
benchmark
ubound
=
0.0
2
+
benchmark
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
i
,
name
in
enumerate
(
risk_names
):
...
...
@@ -92,7 +92,9 @@ for strategy in strategies:
is_tradable
=
total_data
.
isOpen
.
values
.
astype
(
bool
),
method
=
'risk_neutral'
,
constraints
=
constraint
,
use_rank
=
100
)
use_rank
=
50
,
lbound
=
lbound
,
ubound
=
ubound
)
except
Exception
as
e
:
print
(
e
)
rets
.
append
(
0.
)
...
...
@@ -103,6 +105,11 @@ for strategy in strategies:
ret_df
=
pd
.
DataFrame
(
total_data_dict
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1w'
)
ret_df
.
loc
[
start_date
]
=
0.
ret_df
.
sort_index
(
inplace
=
True
)
ret_df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
plt
.
savefig
(
"backtest_big_universe_20170814.png"
)
plt
.
show
()
alphamind/examples/formula_expression.py
0 → 100644
View file @
459abdd1
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
# defind your alpha formula here
base_factors
=
[
'EPS'
,
'ROEDiluted'
,
'VAL'
,
'CFinc1'
]
expression
=
0.
for
name
in
base_factors
:
expression
=
expression
+
LAST
(
name
)
alpha_factor_name
=
'alpha_factor'
alpha_factor
=
{
alpha_factor_name
:
expression
}
# end of formula definition
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
benchmark_code
=
905
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
freq
=
'1w'
n_bins
=
5
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
dates
=
makeSchedule
(
'2017-01-01'
,
'2017-08-18'
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
,
benchmark
=
905
)[
'factor'
]
factor_groups
=
factor_all_data
.
groupby
(
'Date'
)
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'Code'
,
alpha_factor_name
,
'isOpen'
,
'weight'
]
+
neutralize_risk
]
codes
=
data
.
Code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
engine
.
fetch_dx_return
(
date
,
codes
,
horizon
=
horizon
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'Code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
benchmark
=
total_data
.
weight
.
values
f_data
=
total_data
[[
alpha_factor_name
]]
try
:
res
=
quantile_analysis
(
f_data
,
[
1.
],
dx_return
,
risk_exp
=
risk_exp
,
n_bins
=
n_bins
,
benchmark
=
benchmark
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1w'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
plt
.
show
()
alphamind/examples/plot_quantile_res.py
View file @
459abdd1
...
...
@@ -30,15 +30,15 @@ elif freq == '1w':
elif
freq
==
'1d'
:
horizon
=
0
start_date
=
'201
6-04
-01'
end_date
=
'2017-08-1
7
'
start_date
=
'201
7-01
-01'
end_date
=
'2017-08-1
8
'
dates
=
makeSchedule
(
start_date
,
end_date
,
tenor
=
freq
,
calendar
=
'china.sse'
)
prod_factors
=
[
'
IVR'
,
'RVOL
'
]
prod_factors
=
[
'
EPS
'
]
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment