Commit 459abdd1 authored by Dr.李's avatar Dr.李

added interface for alpha formula definition

parent 171f1d75
......@@ -13,7 +13,7 @@ import numpy as np
import pandas as pd
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy import select, and_, outerjoin, join, over
from sqlalchemy import select, and_, outerjoin, join
from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster
......@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Market
from alphamind.data.transformer import Transformer
from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA',
......@@ -218,9 +219,13 @@ class SqlEngine(object):
def fetch_factor(self,
ref_date: str,
factors: Iterable[str],
factors: Iterable[object],
codes: Iterable[int]) -> pd.DataFrame:
factor_cols = _map_factors(factors)
transformer = Transformer(factors)
dependency = transformer.dependency
factor_cols = _map_factors(dependency)
big_table = Market
for t in set(factor_cols.values()):
......@@ -230,15 +235,24 @@ class SqlEngine(object):
.select_from(big_table) \
.where(and_(Market.Date == ref_date, Market.Code.in_(codes)))
return pd.read_sql(query, self.engine)
df = pd.read_sql(query, self.engine)
res = transformer.transform('Code', df)
for col in res.columns:
if col not in set(['Code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
return df
def fetch_factor_range(self,
universe: Universe,
factors: Iterable[str],
factors: Iterable[object],
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
factor_cols = _map_factors(factors)
transformer = Transformer(factors)
dependency = transformer.dependency
factor_cols = _map_factors(dependency)
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
......@@ -249,7 +263,14 @@ class SqlEngine(object):
query = select([Market.Date, Market.Code, Market.isOpen] + list(factor_cols.keys())) \
.select_from(big_table)
return pd.read_sql(query, self.engine)
df = pd.read_sql(query, self.engine).sort_values(['Date', 'Code']).set_index('Date')
res = transformer.transform('Code', df)
for col in res.columns:
if col not in set(['Code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
return df.reset_index()
def fetch_benchmark(self,
ref_date: str,
......@@ -402,13 +423,16 @@ class SqlEngine(object):
if __name__ == '__main__':
from PyFin.api import *
db_url = 'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url = 'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
universe = Universe('custom', ['zz500'])
engine = SqlEngine(db_url)
ref_date = '2017-08-10'
codes = engine.fetch_codes_range(universe, None, None, ['2017-01-01', '2017-08-10'])
data2 = engine.fetch_dx_return_range(universe, '2017-08-01', '2017-08-10', ['2017-08-01', '2017-08-10'])
codes = engine.fetch_codes(universe=universe, ref_date='2017-08-10')
MAXIMUM(('EPS', 'ROEDiluted'))
data2 = engine.fetch_factor_range(universe=universe, dates=['2017-08-01', '2017-08-10'], factors={'factor': MAXIMUM(('EPS', 'ROEDiluted'))})
print(codes)
print(data2)
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import transform as transform_impl
DEFAULT_FACTOR_NAME = 'user_factor'
def factor_translator(factor_pool):
if isinstance(factor_pool, str):
return {factor_pool: factor_pool}, [factor_pool]
elif isinstance(factor_pool, SecurityValueHolder):
return {DEFAULT_FACTOR_NAME: factor_pool}, sorted(factor_pool.fields)
elif isinstance(factor_pool, dict):
dependency = set()
for k, v in factor_pool.items():
pyFinAssert(isinstance(k, str), ValueError, 'factor_name {0} should be string.'.format(k))
pyFinAssert(isinstance(v, SecurityValueHolder) or isinstance(v, str),
ValueError,
'expression {0} should be a value hodler or a string.'.format(v))
if isinstance(v, str):
dependency = dependency.union([v])
else:
dependency = dependency.union(v.fields)
return factor_pool, sorted(dependency)
elif isinstance(factor_pool, list):
factor_dict = {}
dependency = set()
k = 1
for i, f in enumerate(factor_pool):
if isinstance(f, str):
factor_dict[f] = f
dependency = dependency.union([f])
elif isinstance(f, SecurityValueHolder):
factor_dict[DEFAULT_FACTOR_NAME + '_' + str(k).zfill(3)] = f
dependency = dependency.union(f.fields)
k += 1
return factor_dict, sorted(dependency)
else:
raise ValueError('{0} is not in valid format as factors'.format(factor_pool))
class Transformer(object):
def __init__(self,
expressions):
expression_dict, expression_dependency = \
factor_translator(expressions)
res = list(zip(*list(expression_dict.items())))
self.names = list(res[0])
self.expressions = list(res[1])
self.dependency = expression_dependency
def transform(self, group_name, data):
if len(data) > 0:
transformed_data = transform_impl(data,
self.expressions,
self.names,
group_name,
dropna=False)
return transformed_data
else:
return pd.DataFrame()
......@@ -8,8 +8,8 @@ Created on 2017-8-16
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PyFin.api import makeSchedule
from alphamind.api import *
from PyFin.api import *
strategies = {
......@@ -27,8 +27,8 @@ strategies = {
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
neutralize_risk = industry_styles
constraint_risk = industry_styles
freq = '1w'
if freq == '1m':
......@@ -38,8 +38,8 @@ elif freq == '1w':
elif freq == '1d':
horizon = 0
dates = makeSchedule('2012-01-14',
'2017-08-14',
dates = makeSchedule('2017-01-01',
'2017-08-18',
tenor=freq,
calendar='china.sse')
......@@ -75,7 +75,7 @@ for strategy in strategies:
risk_target = risk_exp_expand.T @ benchmark
lbound = np.zeros(len(total_data))
ubound = 0.01 + benchmark
ubound = 0.02 + benchmark
constraint = Constraints(risk_exp_expand, risk_names)
for i, name in enumerate(risk_names):
......@@ -92,7 +92,9 @@ for strategy in strategies:
is_tradable=total_data.isOpen.values.astype(bool),
method='risk_neutral',
constraints=constraint,
use_rank=100)
use_rank=50,
lbound=lbound,
ubound=ubound)
except Exception as e:
print(e)
rets.append(0.)
......@@ -103,6 +105,11 @@ for strategy in strategies:
ret_df = pd.DataFrame(total_data_dict, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
ret_df.loc[start_date] = 0.
ret_df.sort_index(inplace=True)
ret_df.cumsum().plot(figsize=(12, 6))
plt.savefig("backtest_big_universe_20170814.png")
plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
# defind your alpha formula here
base_factors = ['EPS', 'ROEDiluted', 'VAL', 'CFinc1']
expression = 0.
for name in base_factors:
expression = expression + LAST(name)
alpha_factor_name = 'alpha_factor'
alpha_factor = {alpha_factor_name: expression}
# end of formula definition
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
freq = '1w'
n_bins = 5
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
dates = makeSchedule('2017-01-01',
'2017-08-18',
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates,
benchmark=905)['factor']
factor_groups = factor_all_data.groupby('Date')
final_res = np.zeros((len(dates), n_bins))
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['Code', alpha_factor_name, 'isOpen', 'weight'] + neutralize_risk]
codes = data.Code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = engine.fetch_dx_return(date, codes, horizon=horizon)
total_data = pd.merge(data, returns, on=['Code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
benchmark = total_data.weight.values
f_data = total_data[[alpha_factor_name]]
try:
res = quantile_analysis(f_data,
[1.],
dx_return,
risk_exp=risk_exp,
n_bins=n_bins,
benchmark=benchmark)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot()
plt.show()
......@@ -30,15 +30,15 @@ elif freq == '1w':
elif freq == '1d':
horizon = 0
start_date = '2016-04-01'
end_date = '2017-08-17'
start_date = '2017-01-01'
end_date = '2017-08-18'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
prod_factors = ['IVR', 'RVOL']
prod_factors = ['EPS']
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment