Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
459abdd1
Commit
459abdd1
authored
Aug 23, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added interface for alpha formula definition
parent
171f1d75
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
212 additions
and
19 deletions
+212
-19
sqlengine.py
alphamind/data/engines/sqlengine.py
+33
-9
transformer.py
alphamind/data/transformer.py
+75
-0
factor_analysis_example.py
alphamind/examples/factor_analysis_example.py
+14
-7
formula_expression.py
alphamind/examples/formula_expression.py
+87
-0
plot_quantile_res.py
alphamind/examples/plot_quantile_res.py
+3
-3
No files found.
alphamind/data/engines/sqlengine.py
View file @
459abdd1
...
@@ -13,7 +13,7 @@ import numpy as np
...
@@ -13,7 +13,7 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
sqlalchemy
as
sa
import
sqlalchemy
as
sa
import
sqlalchemy.orm
as
orm
import
sqlalchemy.orm
as
orm
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
,
over
from
sqlalchemy
import
select
,
and_
,
outerjoin
,
join
from
sqlalchemy.sql
import
func
from
sqlalchemy.sql
import
func
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.engines.universe
import
Universe
from
alphamind.data.dbmodel.models
import
FactorMaster
from
alphamind.data.dbmodel.models
import
FactorMaster
...
@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
...
@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from
alphamind.data.dbmodel.models
import
RiskCovLong
from
alphamind.data.dbmodel.models
import
RiskCovLong
from
alphamind.data.dbmodel.models
import
RiskExposure
from
alphamind.data.dbmodel.models
import
RiskExposure
from
alphamind.data.dbmodel.models
import
Market
from
alphamind.data.dbmodel.models
import
Market
from
alphamind.data.transformer
import
Transformer
from
PyFin.api
import
advanceDateByCalendar
from
PyFin.api
import
advanceDateByCalendar
risk_styles
=
[
'BETA'
,
risk_styles
=
[
'BETA'
,
...
@@ -218,9 +219,13 @@ class SqlEngine(object):
...
@@ -218,9 +219,13 @@ class SqlEngine(object):
def
fetch_factor
(
self
,
def
fetch_factor
(
self
,
ref_date
:
str
,
ref_date
:
str
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
object
],
codes
:
Iterable
[
int
])
->
pd
.
DataFrame
:
codes
:
Iterable
[
int
])
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
transformer
=
Transformer
(
factors
)
dependency
=
transformer
.
dependency
factor_cols
=
_map_factors
(
dependency
)
big_table
=
Market
big_table
=
Market
for
t
in
set
(
factor_cols
.
values
()):
for
t
in
set
(
factor_cols
.
values
()):
...
@@ -230,15 +235,24 @@ class SqlEngine(object):
...
@@ -230,15 +235,24 @@ class SqlEngine(object):
.
select_from
(
big_table
)
\
.
select_from
(
big_table
)
\
.
where
(
and_
(
Market
.
Date
==
ref_date
,
Market
.
Code
.
in_
(
codes
)))
.
where
(
and_
(
Market
.
Date
==
ref_date
,
Market
.
Code
.
in_
(
codes
)))
return
pd
.
read_sql
(
query
,
self
.
engine
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
res
=
transformer
.
transform
(
'Code'
,
df
)
for
col
in
res
.
columns
:
if
col
not
in
set
([
'Code'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
return
df
def
fetch_factor_range
(
self
,
def
fetch_factor_range
(
self
,
universe
:
Universe
,
universe
:
Universe
,
factors
:
Iterable
[
str
],
factors
:
Iterable
[
object
],
start_date
:
str
=
None
,
start_date
:
str
=
None
,
end_date
:
str
=
None
,
end_date
:
str
=
None
,
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
dates
:
Iterable
[
str
]
=
None
)
->
pd
.
DataFrame
:
factor_cols
=
_map_factors
(
factors
)
transformer
=
Transformer
(
factors
)
dependency
=
transformer
.
dependency
factor_cols
=
_map_factors
(
dependency
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
q2
=
universe
.
query_range
(
start_date
,
end_date
,
dates
)
.
alias
(
'temp_universe'
)
...
@@ -249,7 +263,14 @@ class SqlEngine(object):
...
@@ -249,7 +263,14 @@ class SqlEngine(object):
query
=
select
([
Market
.
Date
,
Market
.
Code
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
query
=
select
([
Market
.
Date
,
Market
.
Code
,
Market
.
isOpen
]
+
list
(
factor_cols
.
keys
()))
\
.
select_from
(
big_table
)
.
select_from
(
big_table
)
return
pd
.
read_sql
(
query
,
self
.
engine
)
df
=
pd
.
read_sql
(
query
,
self
.
engine
)
.
sort_values
([
'Date'
,
'Code'
])
.
set_index
(
'Date'
)
res
=
transformer
.
transform
(
'Code'
,
df
)
for
col
in
res
.
columns
:
if
col
not
in
set
([
'Code'
,
'isOpen'
])
and
col
not
in
df
.
columns
:
df
[
col
]
=
res
[
col
]
.
values
return
df
.
reset_index
()
def
fetch_benchmark
(
self
,
def
fetch_benchmark
(
self
,
ref_date
:
str
,
ref_date
:
str
,
...
@@ -402,13 +423,16 @@ class SqlEngine(object):
...
@@ -402,13 +423,16 @@ class SqlEngine(object):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
from
PyFin.api
import
*
db_url
=
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url
=
'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url
=
'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
universe
=
Universe
(
'custom'
,
[
'zz500'
])
universe
=
Universe
(
'custom'
,
[
'zz500'
])
engine
=
SqlEngine
(
db_url
)
engine
=
SqlEngine
(
db_url
)
ref_date
=
'2017-08-10'
ref_date
=
'2017-08-10'
codes
=
engine
.
fetch_codes_range
(
universe
,
None
,
None
,
[
'2017-01-01'
,
'2017-08-10'
])
codes
=
engine
.
fetch_codes
(
universe
=
universe
,
ref_date
=
'2017-08-10'
)
data2
=
engine
.
fetch_dx_return_range
(
universe
,
'2017-08-01'
,
'2017-08-10'
,
[
'2017-08-01'
,
'2017-08-10'
])
MAXIMUM
((
'EPS'
,
'ROEDiluted'
))
data2
=
engine
.
fetch_factor_range
(
universe
=
universe
,
dates
=
[
'2017-08-01'
,
'2017-08-10'
],
factors
=
{
'factor'
:
MAXIMUM
((
'EPS'
,
'ROEDiluted'
))})
print
(
codes
)
print
(
codes
)
print
(
data2
)
print
(
data2
)
alphamind/data/transformer.py
0 → 100644
View file @
459abdd1
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import
pandas
as
pd
from
PyFin.api
import
pyFinAssert
from
PyFin.Analysis.SecurityValueHolders
import
SecurityValueHolder
from
PyFin.api
import
transform
as
transform_impl
DEFAULT_FACTOR_NAME
=
'user_factor'
def
factor_translator
(
factor_pool
):
if
isinstance
(
factor_pool
,
str
):
return
{
factor_pool
:
factor_pool
},
[
factor_pool
]
elif
isinstance
(
factor_pool
,
SecurityValueHolder
):
return
{
DEFAULT_FACTOR_NAME
:
factor_pool
},
sorted
(
factor_pool
.
fields
)
elif
isinstance
(
factor_pool
,
dict
):
dependency
=
set
()
for
k
,
v
in
factor_pool
.
items
():
pyFinAssert
(
isinstance
(
k
,
str
),
ValueError
,
'factor_name {0} should be string.'
.
format
(
k
))
pyFinAssert
(
isinstance
(
v
,
SecurityValueHolder
)
or
isinstance
(
v
,
str
),
ValueError
,
'expression {0} should be a value hodler or a string.'
.
format
(
v
))
if
isinstance
(
v
,
str
):
dependency
=
dependency
.
union
([
v
])
else
:
dependency
=
dependency
.
union
(
v
.
fields
)
return
factor_pool
,
sorted
(
dependency
)
elif
isinstance
(
factor_pool
,
list
):
factor_dict
=
{}
dependency
=
set
()
k
=
1
for
i
,
f
in
enumerate
(
factor_pool
):
if
isinstance
(
f
,
str
):
factor_dict
[
f
]
=
f
dependency
=
dependency
.
union
([
f
])
elif
isinstance
(
f
,
SecurityValueHolder
):
factor_dict
[
DEFAULT_FACTOR_NAME
+
'_'
+
str
(
k
)
.
zfill
(
3
)]
=
f
dependency
=
dependency
.
union
(
f
.
fields
)
k
+=
1
return
factor_dict
,
sorted
(
dependency
)
else
:
raise
ValueError
(
'{0} is not in valid format as factors'
.
format
(
factor_pool
))
class
Transformer
(
object
):
def
__init__
(
self
,
expressions
):
expression_dict
,
expression_dependency
=
\
factor_translator
(
expressions
)
res
=
list
(
zip
(
*
list
(
expression_dict
.
items
())))
self
.
names
=
list
(
res
[
0
])
self
.
expressions
=
list
(
res
[
1
])
self
.
dependency
=
expression_dependency
def
transform
(
self
,
group_name
,
data
):
if
len
(
data
)
>
0
:
transformed_data
=
transform_impl
(
data
,
self
.
expressions
,
self
.
names
,
group_name
,
dropna
=
False
)
return
transformed_data
else
:
return
pd
.
DataFrame
()
alphamind/examples/factor_analysis_example.py
View file @
459abdd1
...
@@ -8,8 +8,8 @@ Created on 2017-8-16
...
@@ -8,8 +8,8 @@ Created on 2017-8-16
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
matplotlib
import
pyplot
as
plt
from
matplotlib
import
pyplot
as
plt
from
PyFin.api
import
makeSchedule
from
alphamind.api
import
*
from
alphamind.api
import
*
from
PyFin.api
import
*
strategies
=
{
strategies
=
{
...
@@ -27,8 +27,8 @@ strategies = {
...
@@ -27,8 +27,8 @@ strategies = {
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
universe
=
Universe
(
'custom'
,
[
'zz500'
])
benchmark_code
=
905
benchmark_code
=
905
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
neutralize_risk
=
industry_styles
constraint_risk
=
[
'SIZE'
]
+
industry_styles
constraint_risk
=
industry_styles
freq
=
'1w'
freq
=
'1w'
if
freq
==
'1m'
:
if
freq
==
'1m'
:
...
@@ -38,8 +38,8 @@ elif freq == '1w':
...
@@ -38,8 +38,8 @@ elif freq == '1w':
elif
freq
==
'1d'
:
elif
freq
==
'1d'
:
horizon
=
0
horizon
=
0
dates
=
makeSchedule
(
'201
2-01-14
'
,
dates
=
makeSchedule
(
'201
7-01-01
'
,
'2017-08-1
4
'
,
'2017-08-1
8
'
,
tenor
=
freq
,
tenor
=
freq
,
calendar
=
'china.sse'
)
calendar
=
'china.sse'
)
...
@@ -75,7 +75,7 @@ for strategy in strategies:
...
@@ -75,7 +75,7 @@ for strategy in strategies:
risk_target
=
risk_exp_expand
.
T
@
benchmark
risk_target
=
risk_exp_expand
.
T
@
benchmark
lbound
=
np
.
zeros
(
len
(
total_data
))
lbound
=
np
.
zeros
(
len
(
total_data
))
ubound
=
0.0
1
+
benchmark
ubound
=
0.0
2
+
benchmark
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
constraint
=
Constraints
(
risk_exp_expand
,
risk_names
)
for
i
,
name
in
enumerate
(
risk_names
):
for
i
,
name
in
enumerate
(
risk_names
):
...
@@ -92,7 +92,9 @@ for strategy in strategies:
...
@@ -92,7 +92,9 @@ for strategy in strategies:
is_tradable
=
total_data
.
isOpen
.
values
.
astype
(
bool
),
is_tradable
=
total_data
.
isOpen
.
values
.
astype
(
bool
),
method
=
'risk_neutral'
,
method
=
'risk_neutral'
,
constraints
=
constraint
,
constraints
=
constraint
,
use_rank
=
100
)
use_rank
=
50
,
lbound
=
lbound
,
ubound
=
ubound
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
rets
.
append
(
0.
)
rets
.
append
(
0.
)
...
@@ -103,6 +105,11 @@ for strategy in strategies:
...
@@ -103,6 +105,11 @@ for strategy in strategies:
ret_df
=
pd
.
DataFrame
(
total_data_dict
,
index
=
dates
)
ret_df
=
pd
.
DataFrame
(
total_data_dict
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1w'
)
ret_df
.
loc
[
start_date
]
=
0.
ret_df
.
sort_index
(
inplace
=
True
)
ret_df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
ret_df
.
cumsum
()
.
plot
(
figsize
=
(
12
,
6
))
plt
.
savefig
(
"backtest_big_universe_20170814.png"
)
plt
.
savefig
(
"backtest_big_universe_20170814.png"
)
plt
.
show
()
plt
.
show
()
alphamind/examples/formula_expression.py
0 → 100644
View file @
459abdd1
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import
numpy
as
np
import
pandas
as
pd
from
PyFin.api
import
*
from
alphamind.api
import
*
from
matplotlib
import
pyplot
as
plt
# defind your alpha formula here
base_factors
=
[
'EPS'
,
'ROEDiluted'
,
'VAL'
,
'CFinc1'
]
expression
=
0.
for
name
in
base_factors
:
expression
=
expression
+
LAST
(
name
)
alpha_factor_name
=
'alpha_factor'
alpha_factor
=
{
alpha_factor_name
:
expression
}
# end of formula definition
engine
=
SqlEngine
(
"mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha"
)
universe
=
Universe
(
'custom'
,
[
'zz500'
])
benchmark_code
=
905
neutralize_risk
=
[
'SIZE'
]
+
industry_styles
freq
=
'1w'
n_bins
=
5
if
freq
==
'1m'
:
horizon
=
21
elif
freq
==
'1w'
:
horizon
=
4
elif
freq
==
'1d'
:
horizon
=
0
dates
=
makeSchedule
(
'2017-01-01'
,
'2017-08-18'
,
tenor
=
freq
,
calendar
=
'china.sse'
)
factor_all_data
=
engine
.
fetch_data_range
(
universe
,
alpha_factor
,
dates
=
dates
,
benchmark
=
905
)[
'factor'
]
factor_groups
=
factor_all_data
.
groupby
(
'Date'
)
final_res
=
np
.
zeros
((
len
(
dates
),
n_bins
))
for
i
,
value
in
enumerate
(
factor_groups
):
date
=
value
[
0
]
data
=
value
[
1
][[
'Code'
,
alpha_factor_name
,
'isOpen'
,
'weight'
]
+
neutralize_risk
]
codes
=
data
.
Code
.
tolist
()
ref_date
=
value
[
0
]
.
strftime
(
'
%
Y-
%
m-
%
d'
)
returns
=
engine
.
fetch_dx_return
(
date
,
codes
,
horizon
=
horizon
)
total_data
=
pd
.
merge
(
data
,
returns
,
on
=
[
'Code'
])
.
dropna
()
risk_exp
=
total_data
[
neutralize_risk
]
.
values
.
astype
(
float
)
dx_return
=
total_data
.
dx
.
values
benchmark
=
total_data
.
weight
.
values
f_data
=
total_data
[[
alpha_factor_name
]]
try
:
res
=
quantile_analysis
(
f_data
,
[
1.
],
dx_return
,
risk_exp
=
risk_exp
,
n_bins
=
n_bins
,
benchmark
=
benchmark
)
except
Exception
as
e
:
print
(
e
)
res
=
np
.
zeros
(
n_bins
)
final_res
[
i
]
=
res
/
benchmark
.
sum
()
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
)
start_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
0
],
'-1w'
)
df
.
loc
[
start_date
]
=
0.
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
plt
.
show
()
alphamind/examples/plot_quantile_res.py
View file @
459abdd1
...
@@ -30,15 +30,15 @@ elif freq == '1w':
...
@@ -30,15 +30,15 @@ elif freq == '1w':
elif
freq
==
'1d'
:
elif
freq
==
'1d'
:
horizon
=
0
horizon
=
0
start_date
=
'201
6-04
-01'
start_date
=
'201
7-01
-01'
end_date
=
'2017-08-1
7
'
end_date
=
'2017-08-1
8
'
dates
=
makeSchedule
(
start_date
,
dates
=
makeSchedule
(
start_date
,
end_date
,
end_date
,
tenor
=
freq
,
tenor
=
freq
,
calendar
=
'china.sse'
)
calendar
=
'china.sse'
)
prod_factors
=
[
'
IVR'
,
'RVOL
'
]
prod_factors
=
[
'
EPS
'
]
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
all_data
=
engine
.
fetch_data_range
(
universe
,
prod_factors
,
dates
=
dates
,
benchmark
=
905
)
factor_all_data
=
all_data
[
'factor'
]
factor_all_data
=
all_data
[
'factor'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment