Commit 459abdd1 authored by Dr.李's avatar Dr.李

added interface for alpha formula definition

parent 171f1d75
...@@ -13,7 +13,7 @@ import numpy as np ...@@ -13,7 +13,7 @@ import numpy as np
import pandas as pd import pandas as pd
import sqlalchemy as sa import sqlalchemy as sa
import sqlalchemy.orm as orm import sqlalchemy.orm as orm
from sqlalchemy import select, and_, outerjoin, join, over from sqlalchemy import select, and_, outerjoin, join
from sqlalchemy.sql import func from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster from alphamind.data.dbmodel.models import FactorMaster
...@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort ...@@ -32,6 +32,7 @@ from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskCovLong from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import RiskExposure from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Market from alphamind.data.dbmodel.models import Market
from alphamind.data.transformer import Transformer
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA', risk_styles = ['BETA',
...@@ -218,9 +219,13 @@ class SqlEngine(object): ...@@ -218,9 +219,13 @@ class SqlEngine(object):
def fetch_factor(self, def fetch_factor(self,
ref_date: str, ref_date: str,
factors: Iterable[str], factors: Iterable[object],
codes: Iterable[int]) -> pd.DataFrame: codes: Iterable[int]) -> pd.DataFrame:
factor_cols = _map_factors(factors)
transformer = Transformer(factors)
dependency = transformer.dependency
factor_cols = _map_factors(dependency)
big_table = Market big_table = Market
for t in set(factor_cols.values()): for t in set(factor_cols.values()):
...@@ -230,15 +235,24 @@ class SqlEngine(object): ...@@ -230,15 +235,24 @@ class SqlEngine(object):
.select_from(big_table) \ .select_from(big_table) \
.where(and_(Market.Date == ref_date, Market.Code.in_(codes))) .where(and_(Market.Date == ref_date, Market.Code.in_(codes)))
return pd.read_sql(query, self.engine) df = pd.read_sql(query, self.engine)
res = transformer.transform('Code', df)
for col in res.columns:
if col not in set(['Code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
return df
def fetch_factor_range(self, def fetch_factor_range(self,
universe: Universe, universe: Universe,
factors: Iterable[str], factors: Iterable[object],
start_date: str = None, start_date: str = None,
end_date: str = None, end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame: dates: Iterable[str] = None) -> pd.DataFrame:
factor_cols = _map_factors(factors)
transformer = Transformer(factors)
dependency = transformer.dependency
factor_cols = _map_factors(dependency)
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe') q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
...@@ -249,7 +263,14 @@ class SqlEngine(object): ...@@ -249,7 +263,14 @@ class SqlEngine(object):
query = select([Market.Date, Market.Code, Market.isOpen] + list(factor_cols.keys())) \ query = select([Market.Date, Market.Code, Market.isOpen] + list(factor_cols.keys())) \
.select_from(big_table) .select_from(big_table)
return pd.read_sql(query, self.engine) df = pd.read_sql(query, self.engine).sort_values(['Date', 'Code']).set_index('Date')
res = transformer.transform('Code', df)
for col in res.columns:
if col not in set(['Code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
return df.reset_index()
def fetch_benchmark(self, def fetch_benchmark(self,
ref_date: str, ref_date: str,
...@@ -402,13 +423,16 @@ class SqlEngine(object): ...@@ -402,13 +423,16 @@ class SqlEngine(object):
if __name__ == '__main__': if __name__ == '__main__':
from PyFin.api import *
db_url = 'postgresql+psycopg2://postgres:we083826@localhost/alpha' db_url = 'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url = 'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
universe = Universe('custom', ['zz500']) universe = Universe('custom', ['zz500'])
engine = SqlEngine(db_url) engine = SqlEngine(db_url)
ref_date = '2017-08-10' ref_date = '2017-08-10'
codes = engine.fetch_codes_range(universe, None, None, ['2017-01-01', '2017-08-10']) codes = engine.fetch_codes(universe=universe, ref_date='2017-08-10')
data2 = engine.fetch_dx_return_range(universe, '2017-08-01', '2017-08-10', ['2017-08-01', '2017-08-10']) MAXIMUM(('EPS', 'ROEDiluted'))
data2 = engine.fetch_factor_range(universe=universe, dates=['2017-08-01', '2017-08-10'], factors={'factor': MAXIMUM(('EPS', 'ROEDiluted'))})
print(codes) print(codes)
print(data2) print(data2)
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import transform as transform_impl
DEFAULT_FACTOR_NAME = 'user_factor'
def factor_translator(factor_pool):
if isinstance(factor_pool, str):
return {factor_pool: factor_pool}, [factor_pool]
elif isinstance(factor_pool, SecurityValueHolder):
return {DEFAULT_FACTOR_NAME: factor_pool}, sorted(factor_pool.fields)
elif isinstance(factor_pool, dict):
dependency = set()
for k, v in factor_pool.items():
pyFinAssert(isinstance(k, str), ValueError, 'factor_name {0} should be string.'.format(k))
pyFinAssert(isinstance(v, SecurityValueHolder) or isinstance(v, str),
ValueError,
'expression {0} should be a value hodler or a string.'.format(v))
if isinstance(v, str):
dependency = dependency.union([v])
else:
dependency = dependency.union(v.fields)
return factor_pool, sorted(dependency)
elif isinstance(factor_pool, list):
factor_dict = {}
dependency = set()
k = 1
for i, f in enumerate(factor_pool):
if isinstance(f, str):
factor_dict[f] = f
dependency = dependency.union([f])
elif isinstance(f, SecurityValueHolder):
factor_dict[DEFAULT_FACTOR_NAME + '_' + str(k).zfill(3)] = f
dependency = dependency.union(f.fields)
k += 1
return factor_dict, sorted(dependency)
else:
raise ValueError('{0} is not in valid format as factors'.format(factor_pool))
class Transformer(object):
def __init__(self,
expressions):
expression_dict, expression_dependency = \
factor_translator(expressions)
res = list(zip(*list(expression_dict.items())))
self.names = list(res[0])
self.expressions = list(res[1])
self.dependency = expression_dependency
def transform(self, group_name, data):
if len(data) > 0:
transformed_data = transform_impl(data,
self.expressions,
self.names,
group_name,
dropna=False)
return transformed_data
else:
return pd.DataFrame()
...@@ -8,8 +8,8 @@ Created on 2017-8-16 ...@@ -8,8 +8,8 @@ Created on 2017-8-16
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from PyFin.api import makeSchedule
from alphamind.api import * from alphamind.api import *
from PyFin.api import *
strategies = { strategies = {
...@@ -27,8 +27,8 @@ strategies = { ...@@ -27,8 +27,8 @@ strategies = {
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha") engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
universe = Universe('custom', ['zz500']) universe = Universe('custom', ['zz500'])
benchmark_code = 905 benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles neutralize_risk = industry_styles
constraint_risk = ['SIZE'] + industry_styles constraint_risk = industry_styles
freq = '1w' freq = '1w'
if freq == '1m': if freq == '1m':
...@@ -38,8 +38,8 @@ elif freq == '1w': ...@@ -38,8 +38,8 @@ elif freq == '1w':
elif freq == '1d': elif freq == '1d':
horizon = 0 horizon = 0
dates = makeSchedule('2012-01-14', dates = makeSchedule('2017-01-01',
'2017-08-14', '2017-08-18',
tenor=freq, tenor=freq,
calendar='china.sse') calendar='china.sse')
...@@ -75,7 +75,7 @@ for strategy in strategies: ...@@ -75,7 +75,7 @@ for strategy in strategies:
risk_target = risk_exp_expand.T @ benchmark risk_target = risk_exp_expand.T @ benchmark
lbound = np.zeros(len(total_data)) lbound = np.zeros(len(total_data))
ubound = 0.01 + benchmark ubound = 0.02 + benchmark
constraint = Constraints(risk_exp_expand, risk_names) constraint = Constraints(risk_exp_expand, risk_names)
for i, name in enumerate(risk_names): for i, name in enumerate(risk_names):
...@@ -92,7 +92,9 @@ for strategy in strategies: ...@@ -92,7 +92,9 @@ for strategy in strategies:
is_tradable=total_data.isOpen.values.astype(bool), is_tradable=total_data.isOpen.values.astype(bool),
method='risk_neutral', method='risk_neutral',
constraints=constraint, constraints=constraint,
use_rank=100) use_rank=50,
lbound=lbound,
ubound=ubound)
except Exception as e: except Exception as e:
print(e) print(e)
rets.append(0.) rets.append(0.)
...@@ -103,6 +105,11 @@ for strategy in strategies: ...@@ -103,6 +105,11 @@ for strategy in strategies:
ret_df = pd.DataFrame(total_data_dict, index=dates) ret_df = pd.DataFrame(total_data_dict, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
ret_df.loc[start_date] = 0.
ret_df.sort_index(inplace=True)
ret_df.cumsum().plot(figsize=(12, 6)) ret_df.cumsum().plot(figsize=(12, 6))
plt.savefig("backtest_big_universe_20170814.png") plt.savefig("backtest_big_universe_20170814.png")
plt.show() plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2017-8-23
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
# defind your alpha formula here
base_factors = ['EPS', 'ROEDiluted', 'VAL', 'CFinc1']
expression = 0.
for name in base_factors:
expression = expression + LAST(name)
alpha_factor_name = 'alpha_factor'
alpha_factor = {alpha_factor_name: expression}
# end of formula definition
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
universe = Universe('custom', ['zz500'])
benchmark_code = 905
neutralize_risk = ['SIZE'] + industry_styles
freq = '1w'
n_bins = 5
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
dates = makeSchedule('2017-01-01',
'2017-08-18',
tenor=freq,
calendar='china.sse')
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates,
benchmark=905)['factor']
factor_groups = factor_all_data.groupby('Date')
final_res = np.zeros((len(dates), n_bins))
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['Code', alpha_factor_name, 'isOpen', 'weight'] + neutralize_risk]
codes = data.Code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = engine.fetch_dx_return(date, codes, horizon=horizon)
total_data = pd.merge(data, returns, on=['Code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
benchmark = total_data.weight.values
f_data = total_data[[alpha_factor_name]]
try:
res = quantile_analysis(f_data,
[1.],
dx_return,
risk_exp=risk_exp,
n_bins=n_bins,
benchmark=benchmark)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1w')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df = df.cumsum().plot()
plt.show()
...@@ -30,15 +30,15 @@ elif freq == '1w': ...@@ -30,15 +30,15 @@ elif freq == '1w':
elif freq == '1d': elif freq == '1d':
horizon = 0 horizon = 0
start_date = '2016-04-01' start_date = '2017-01-01'
end_date = '2017-08-17' end_date = '2017-08-18'
dates = makeSchedule(start_date, dates = makeSchedule(start_date,
end_date, end_date,
tenor=freq, tenor=freq,
calendar='china.sse') calendar='china.sse')
prod_factors = ['IVR', 'RVOL'] prod_factors = ['EPS']
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905) all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor'] factor_all_data = all_data['factor']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment