Unverified Commit 72e4179f authored by lion-sing's avatar lion-sing Committed by GitHub

Merge pull request #3 from alpha-miner/master

update
parents ebd7a27f eac7bef8
......@@ -37,6 +37,7 @@ install:
- pip install simpleutils
- pip install coveralls
- pip install finance-python
- pip install deprecated
- export CWD=$PWD
- cd /usr/src/gtest
- sudo cmake CMakeLists.txt
......
......@@ -48,11 +48,11 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
* Linux
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
   在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran):
```bash
build_linux_dependencies.sh
```
```
## 安装
......
......@@ -6,4 +6,4 @@ Created on 2017-4-25
"""
__version__ = "0.1.0"
__version__ = "0.1.1"
......@@ -7,11 +7,13 @@ Created on 2017-5-25
from typing import Optional
from typing import Tuple
from typing import Union
import numpy as np
import pandas as pd
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.longshortbulder import long_short_build
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.linearbuilder import linear_build
......@@ -60,7 +62,7 @@ def factor_analysis(factors: pd.DataFrame,
def er_portfolio_analysis(er: np.ndarray,
industry: np.ndarray,
dx_return: np.ndarray,
constraints: Optional[Constraints]=None,
constraints: Optional[Union[LinearConstraints, Constraints]]=None,
detail_analysis=True,
benchmark: Optional[np.ndarray] = None,
is_tradable: Optional[np.ndarray] = None,
......
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
from alphamind.data.standardize import standardize
def factor_turn_over(factor_values: np.ndarray,
trade_dates: np.ndarray,
codes: np.ndarray,
use_standize: bool=True):
if use_standize:
factor_values = standardize(factor_values, trade_dates)
if __name__ == '__main__':
from alphamind.api import *
engine = SqlEngine()
factor = 'ep_q'
freq = '5b'
start_date = '2017-06-01'
end_date = '2017-08-01'
universe = Universe('custom', ['zz500'])
......@@ -14,6 +14,10 @@ from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.data.engines.sqlengine import risk_styles
......@@ -24,6 +28,7 @@ from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.neutralize import neutralize
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.data.engines.utilities import industry_list
from alphamind.model import LinearRegression
from alphamind.model import LassoRegression
......@@ -37,6 +42,7 @@ from alphamind.model import XGBTrainer
from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
......@@ -56,6 +62,10 @@ __all__ = [
'Universe',
'factor_processing',
'Constraints',
'LinearConstraints',
'BoundaryType',
'BoundaryDirection',
'create_box_bounds',
'evolve_positions',
'risk_styles',
'industry_styles',
......@@ -65,8 +75,10 @@ __all__ = [
'projection',
'neutralize',
'factor_tables',
'industry_list',
'fetch_data_package',
'fetch_train_phase',
'fetch_predict_phase',
'LinearRegression',
'LassoRegression',
'ConstLinearModel',
......
......@@ -670,6 +670,7 @@ class Experimental(Base):
val_q = Column(Float(53))
ep_q = Column(Float(53))
ep_q_d_1w = Column(Float(53))
ev = Column(Float(53))
class FactorMaster(Base):
......
This diff is collapsed.
......@@ -42,6 +42,7 @@ from alphamind.data.engines.utilities import _map_factors
from alphamind.data.engines.utilities import _map_industry_category
from alphamind.data.engines.utilities import _map_risk_model_table
from alphamind.data.engines.utilities import factor_tables
from alphamind.data.engines.utilities import industry_list
from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA',
......@@ -207,12 +208,12 @@ class SqlEngine(object):
cond = universe._query_statements(start_date, end_date, None)
big_table = join(Market, UniverseTable,
and_(
Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
cond
)
)
and_(
Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
cond
)
)
query = select([Market.trade_date, Market.code, stats]) \
.select_from(big_table)
......@@ -379,7 +380,7 @@ class SqlEngine(object):
FullFactor.code == UniverseTable.code,
cond
)
)
)
query = select(
[FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
......@@ -498,7 +499,7 @@ class SqlEngine(object):
FullFactor.code == UniverseTable.code,
cond
)
)
)
query = select(
[FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
......@@ -508,20 +509,24 @@ class SqlEngine(object):
if universe.is_filtered:
codes = universe.query(self, start_date, end_date, dates)
risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(
['trade_date', 'code'])
return risk_cov, risk_exp
def fetch_industry(self,
ref_date: str,
codes: Iterable[int],
category: str = 'sw'):
category: str = 'sw',
level: int = 1):
industry_category_name = _map_industry_category(category)
code_name = 'industryID' + str(level)
category_name = 'industryName' + str(level)
query = select([Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]).where(
getattr(Industry, code_name).label('industry_code'),
getattr(Industry, category_name).label('industry')]).where(
and_(
Industry.trade_date == ref_date,
Industry.code.in_(codes),
......@@ -531,14 +536,36 @@ class SqlEngine(object):
return pd.read_sql(query, self.engine)
def fetch_industry_matrix(self,
ref_date: str,
codes: Iterable[int],
category: str = 'sw',
level: int = 1):
df = self.fetch_industry(ref_date, codes, category, level)
df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
industries = industry_list(category, level)
in_s = []
out_s = []
for i in industries:
if i in df:
in_s.append(i)
else:
out_s.append(i)
res = df[['code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
return res
def fetch_industry_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw'):
category: str = 'sw',
level: int = 1):
industry_category_name = _map_industry_category(category)
cond = universe._query_statements(start_date, end_date, dates)
big_table = join(Industry, UniverseTable,
......@@ -547,13 +574,15 @@ class SqlEngine(object):
Industry.code == UniverseTable.code,
Industry.industry == industry_category_name,
cond
)
)
))
code_name = 'industryID' + str(level)
category_name = 'industryName' + str(level)
query = select([Industry.trade_date,
Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]).select_from(big_table).distinct()
getattr(Industry, code_name).label('industry_code'),
getattr(Industry, category_name).label('industry')]).select_from(big_table).distinct()
df = pd.read_sql(query, self.engine)
if universe.is_filtered:
......@@ -561,7 +590,46 @@ class SqlEngine(object):
df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
return df
def fetch_data(self, ref_date: str,
def fetch_industry_matrix_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw',
level: int = 1):
df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
industries = industry_list(category, level)
in_s = []
out_s = []
for i in industries:
if i in df:
in_s.append(i)
else:
out_s.append(i)
res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0]*len(out_s))))
return res
def fetch_trade_status(self,
ref_date: str,
codes: Iterable[int]):
query = select([Market.code, Market.isOpen]).where(
and_(
Market.trade_date == ref_date,
Market.code.in_(codes)
)
)
return pd.read_sql(query, self.engine).sort_values(['code'])
def fetch_data(self,
ref_date: str,
factors: Iterable[str],
codes: Iterable[int],
benchmark: int = None,
......@@ -802,10 +870,10 @@ class SqlEngine(object):
else:
id_filter = 'in_'
t = select([table.trade_id]).\
t = select([table.trade_id]). \
where(and_(table.trade_date <= ref_date,
table.operation == 'withdraw')).alias('t')
query = select([table]).\
query = select([table]). \
where(and_(getattr(table.trade_id, id_filter)(t),
table.trade_date <= ref_date,
table.operation == 'lend'))
......@@ -823,7 +891,7 @@ class SqlEngine(object):
rule = x['price_rule'].split('@')
if rule[0] in ['closePrice', 'openPrice']:
query = select([getattr(Market, rule[0])]).\
query = select([getattr(Market, rule[0])]). \
where(and_(Market.code == code, Market.trade_date == rule[1]))
data = pd.read_sql(query, self.engine)
if not data.empty:
......@@ -835,6 +903,7 @@ class SqlEngine(object):
else:
raise KeyError('do not have rule for %s' % x['price_rule'])
return price
df['price'] = df.apply(lambda x: parse_price_rule(x), axis=1)
df.drop(['remark', 'price_rule', 'operation'], axis=1, inplace=True)
......@@ -848,12 +917,10 @@ class SqlEngine(object):
if __name__ == '__main__':
universe = Universe('ss', ['hs300'])
engine = SqlEngine()
df = engine.fetch_outright_status('2017-12-28')
print(df)
ref_date = '2017-12-28'
codes = universe.query(engine, dates=[ref_date])
df = engine.fetch_trade_status(ref_date, codes.code.tolist())
print(df)
\ No newline at end of file
......@@ -13,6 +13,7 @@ from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import FullFactor
from alphamind.data.dbmodel.models import Gogoal
from alphamind.data.dbmodel.models import Experimental
from alphamind.data.engines.industries import INDUSTRY_MAPPING
factor_tables = [FullFactor, Gogoal, Experimental]
......@@ -43,5 +44,17 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
def _map_industry_category(category: str) -> str:
if category == 'sw':
return '申万行业分类'
if category == 'sw_adj':
return '申万行业分类修订'
elif category == 'zz':
return '中证行业分类'
elif category == 'dx':
return '东兴行业分类'
elif category == 'zjh':
return '证监会行业V2012'
else:
raise ValueError("No other industry is supported at the current time")
\ No newline at end of file
raise ValueError("No other industry is supported at the current time")
def industry_list(category: str, level: int=1) -> list:
return INDUSTRY_MAPPING[category][level]
\ No newline at end of file
......@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std', ddof)
return (x - mean_values) / std_values
return (x - mean_values) / np.maximum(std_values, 1e-8)
else:
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
......@@ -48,7 +48,7 @@ class Standardizer(object):
self.std_ = simple_std(x, axis=0, ddof=self.ddof_)
def transform(self, x: np.ndarray) -> np.ndarray:
return (x - self.mean_) / self.std_
return (x - self.mean_) / np.maximum(self.std_, 1e-8)
class GroupedStandardizer(object):
......@@ -69,4 +69,4 @@ class GroupedStandardizer(object):
def transform(self, x: np.ndarray) -> np.ndarray:
groups = x[:, 0].astype(int)
index = array_index(self.labels_, groups)
return (x[:, 1:] - self.mean_[index]) / self.std_[index]
return (x[:, 1:] - self.mean_[index]) / np.maximum(self.std_[index], 1e-8)
......@@ -5,6 +5,7 @@ Created on 2017-8-23
@author: cheng.li
"""
import copy
import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
......@@ -58,7 +59,7 @@ class Transformer(object):
def __init__(self,
expressions):
expression_dict, expression_dependency = \
factor_translator(expressions)
factor_translator(copy.deepcopy(expressions))
if expression_dict:
self.names = sorted(expression_dict.keys())
......
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
def factor_residue_analysis(start_date,
end_date,
factor_name,
factor,
freq,
universe,
engine):
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
n_bins = 5
horizon = map_freq(freq)
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: factor}
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df['$top1 - bottom1$'] = df[4] - df[0]
return df
def factor_analysis(f_name):
from alphamind.api import SqlEngine, Universe, alpha_logger
engine = SqlEngine()
universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2010-01-01',
'2018-01-26',
f_name,
factor,
'10b',
universe,
engine)
alpha_logger.info('{0} is done'.format(f_name))
return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
......@@ -19,9 +19,12 @@ start = dt.datetime.now()
universe = Universe('custom', ['zz800'])
simple_expression = CSRes(LAST('OperCashInToAsset'), 'roe_q')
factor_name = 'Beta20'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
simple_expression = CSRes(CSRes(LAST(factor_name), base1), base2)
alpha_factor_name = 'alpha_factor'
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: simple_expression}
# end of formula definition
......@@ -29,7 +32,7 @@ alpha_factor = {alpha_factor_name: simple_expression}
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
freq = '10b'
freq = '5b'
n_bins = 5
horizon = map_freq(freq)
......@@ -91,8 +94,6 @@ df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(alpha
# =================================================================== #
factor_name = 'PE'
alpha_factor_name = alpha_factor_name + '_1w_diff'
alpha_factor = {alpha_factor_name: DIFF(simple_expression)}
......
......@@ -402,9 +402,9 @@ if __name__ == '__main__':
universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = ['SIZE']
res = fetch_predict_phase(engine, ['ep_q'],
'2018-01-08',
'5b',
universe,
16,
neutralized_risk=neutralized_risk)
'2012-01-05',
'5b',
universe,
16,
neutralized_risk=neutralized_risk)
print(res)
......@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
def __init__(self,
features: list = None,
formulas: dict = None,
weights: np.ndarray = None):
super().__init__(features)
super().__init__(features, formulas=formulas)
if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights),
ValueError,
......@@ -56,8 +57,8 @@ class ConstLinearModel(ModelBase):
class LinearRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
......@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
......@@ -112,8 +113,8 @@ class LassoRegression(ModelBase):
class LogisticRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......
......@@ -6,6 +6,7 @@ Created on 2017-9-4
"""
import abc
import copy
import arrow
import numpy as np
from alphamind.utilities import alpha_logger
......@@ -15,10 +16,13 @@ from alphamind.utilities import decode
class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None):
def __init__(self, features: list=None, formulas: dict=None):
if features is not None:
self.features = list(features)
else:
self.features = None
self.impl = None
self.formulas = copy.deepcopy(formulas)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
......@@ -43,6 +47,7 @@ class ModelBase(metaclass=abc.ABCMeta):
features=list(self.features),
trained_time=self.trained_time,
desc=encode(self.impl),
formulas=encode(self.formulas),
internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
return model_desc
......@@ -50,6 +55,7 @@ class ModelBase(metaclass=abc.ABCMeta):
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.formulas = decode(model_desc['formulas'])
obj_layout.trained_time = model_desc['trained_time']
obj_layout.impl = decode(model_desc['desc'])
return obj_layout
......
......@@ -28,7 +28,7 @@ class RandomForestRegressor(ModelBase):
max_features: str='auto',
features: List=None,
**kwargs):
super().__init__(features)
super().__init__(features, **kwargs)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -61,8 +61,9 @@ class RandomForestClassifier(ModelBase):
n_estimators: int=100,
max_features: str='auto',
features: List = None,
formulas: dict = None,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -96,11 +97,14 @@ class XGBRegressor(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features: List=None,
formulas: dict = None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
n_jobs=n_jobs,
**kwargs)
def save(self) -> dict:
......@@ -131,12 +135,15 @@ class XGBClassifier(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features: List = None,
formulas: dict = None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
learning_rate=learning_rate,
max_depth=max_depth,
n_jobs=n_jobs,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
......@@ -173,9 +180,11 @@ class XGBTrainer(ModelBase):
subsample=1.,
colsample_bytree=1.,
features: List = None,
random_state=0,
formulas: dict = None,
random_state: int=0,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.params = {
'silent': 1,
'objective': objective,
......@@ -185,6 +194,7 @@ class XGBTrainer(ModelBase):
'tree_method': tree_method,
'subsample': subsample,
'colsample_bytree': colsample_bytree,
'nthread': n_jobs,
'seed': random_state
}
......
# -*- coding: utf-8 -*-
"""
Created on 2018-2-6
@author: cheng.li
"""
from typing import List
from typing import Tuple
from math import inf
import copy
from PyFin.api import pyFinAssert
class Allocation(object):
def __init__(self,
code: int,
minimum: int=0,
maximum: int=inf,
current: int=0):
self.code = code
self.minimum = minimum
self.maximum = maximum
self.current = current
pyFinAssert(self.minimum <= self.current, ValueError, "minimum qty should be lower than current")
pyFinAssert(self.maximum >= self.current, ValueError, "minimum qty should be greater than current")
def __repr__(self):
return "Allocation(code={0}, minimum={1}, maximum={2}, current={3})".format(self.code,
self.minimum,
self.maximum,
self.current)
class Portfolio(object):
def __init__(self,
name: str,
allocations: List[Allocation]):
self.name = name
self.allocations = {a.code: a for a in allocations}
def __getitem__(self, code):
try:
return self.allocations[code]
except KeyError:
allocation = Allocation(code, 0, 0, 0)
self.allocations[code] = allocation
return allocation
def __repr__(self):
return "Portfolio(name={0}, allocations={1})".format(self.name,
list(self.allocations.values()))
@property
def codes(self) -> List[int]:
return sorted(self.allocations.keys())
class Execution(object):
def __init__(self,
code: int,
qty: int,
comment: str=None):
self.code = code
self.qty = qty
self.comment = comment
def __repr__(self):
return "Execution(code={0}, qty={1}, comment={2})".format(self.code,
self.qty,
self.comment)
class Executions(object):
def __init__(self,
name,
executions: List[Execution]=None):
self.name = name
self.executions = executions
def __repr__(self):
return "Executions(name={0}, executions={1})".format(self.name,
self.executions)
class Asset(object):
def __init__(self,
code: int,
name: str=None,
priority: List[str]=None,
forbidden: List[str]=None):
self.code = code
self.name = name
if priority:
self.priority = set(priority)
else:
self.priority = set()
if forbidden:
self.forbidden = set(forbidden)
else:
self.forbidden = set()
self._validation()
def _validation(self):
for p in self.priority:
pyFinAssert(p not in self.forbidden, ValueError, "{0} in priority is in forbidden".format(p))
def __repr__(self):
return "Asset(code={0}, name={1}, priority={2}, forbidden={3})".format(self.code,
self.name,
self.priority,
self.forbidden)
class TargetPositions(object):
def __init__(self,
assets: List[Asset]=None,
qtys: List[int]=None):
if assets:
self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
else:
self.targets = {}
def add_asset(self,
asset: Asset,
qty: int):
if asset.code in self.targets:
raise ValueError()
self.targets[asset.code] = (asset, qty)
def __getitem__(self, code: int) -> Tuple[Asset, int]:
return self.targets[code]
@property
def codes(self) -> List[int]:
return sorted(self.targets.keys())
def __repr__(self):
return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
def handle_one_asset(pre_allocation: Allocation,
asset: Asset,
qty: int) -> Tuple[Execution, Allocation, int]:
minimum = pre_allocation.minimum
maximum = pre_allocation.maximum
current = pre_allocation.current
code = pre_allocation.code
if qty < minimum:
raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
elif qty < maximum:
# need to buy / sell
ex = Execution(code, qty - current)
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
current=qty)
qty = 0
else:
ex = Execution(code, maximum - current)
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
current=maximum)
qty = qty - maximum
return ex, allocation, qty
def pass_through(target_pos: TargetPositions,
portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]:
p_name = portfolio.name
new_target_pos = TargetPositions()
allocations = []
executions = []
for code in target_pos.codes:
asset, qty = target_pos[code]
if asset.priority:
raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
if p_name in asset.forbidden:
ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
allocation = copy.deepcopy(portfolio[code])
new_target_pos.add_asset(asset, qty)
else:
prev_allocation = portfolio[code]
ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty)
new_target_pos.add_asset(asset, qty)
allocations.append(allocation)
executions.append(ex)
return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos
if __name__ == '__main__':
asset1 = Asset(1, 'a')
asset2 = Asset(2, 'b')
asset3 = Asset(3, 'b')
target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100])
allc1 = Allocation(1, 0, 100, 0)
allc2 = Allocation(2, 0, 400, 100)
allc2 = Allocation(3, 0, 400, 200)
portfolio = Portfolio('test1', [allc1, allc2])
executions, portfolio, target_pos = pass_through(target_pos, portfolio)
......@@ -5,17 +5,137 @@ Created on 2017-7-21
@author: cheng.li
"""
from deprecated import deprecated
from math import inf
import numpy as np
import pandas as pd
from enum import IntEnum
from typing import Tuple
from typing import Optional
from typing import Dict
from typing import List
from typing import Union
from typing import Iterable
from PyFin.api import pyFinAssert
class BoundaryDirection(IntEnum):
LOWER = -1
UPPER = 1
class BoundaryType(IntEnum):
ABSOLUTE = 0
RELATIVE = 1
class BoundaryImpl(object):
def __init__(self,
direction: BoundaryDirection,
b_type: BoundaryType,
val: float):
self.direction = direction
self.b_type = b_type
self.val = val
self._validation()
def _validation(self):
pyFinAssert(self.b_type == BoundaryType.ABSOLUTE or self.b_type == BoundaryType.RELATIVE,
ValueError,
"Boundary Type {0} is not recognized".format(self.b_type))
pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
ValueError,
"Boundary direction {0} is not recognized".format(self.direction))
def __call__(self, center: float):
if self.b_type == BoundaryType.ABSOLUTE:
return self.val + center
else:
pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
return self.val * center
class BoxBoundary(object):
def __init__(self,
lower_bound: BoundaryImpl,
upper_bound: BoundaryImpl):
self.lower = lower_bound
self.upper = upper_bound
def bounds(self, center):
l_b, u_b = self.lower(center), self.upper(center)
pyFinAssert(l_b <= u_b, ValueError, "lower bound should be lower then upper bound")
return l_b, u_b
def create_box_bounds(names: List[str],
b_type: Union[Iterable[BoundaryType], BoundaryType],
l_val: Union[Iterable[float], float],
u_val: Union[Iterable[float], float]) -> Dict[str, BoxBoundary]:
"""
helper function to quickly create a series of bounds
"""
bounds = dict()
if not hasattr(b_type, '__iter__'):
b_type = np.array([b_type] * len(names))
if not hasattr(l_val, '__iter__'):
l_val = np.array([l_val] * len(names))
if not hasattr(u_val, '__iter__'):
u_val = np.array([u_val] * len(names))
for i, name in enumerate(names):
lower = BoundaryImpl(BoundaryDirection.LOWER,
b_type[i],
l_val[i])
upper = BoundaryImpl(BoundaryDirection.UPPER,
b_type[i],
u_val[i])
bounds[name] = BoxBoundary(lower, upper)
return bounds
class LinearConstraints(object):
def __init__(self,
bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame,
backbone: np.ndarray):
pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
pyFinAssert(cons_mat.shape[0] == len(backbone),
"length of back bond should be same as number of rows of cons_mat")
self.names = list(bounds.keys())
self.bounds = bounds
self.cons_mat = cons_mat
self.backbone = backbone
def risk_targets(self) -> Tuple[np.ndarray, np.ndarray]:
lower_bounds = []
upper_bounds = []
for name in self.names:
center = self.backbone @ self.cons_mat[name].values
l, u = self.bounds[name].bounds(center)
lower_bounds.append(l)
upper_bounds.append(u)
return np.array(lower_bounds), np.array(upper_bounds)
@property
def risk_exp(self) -> np.ndarray:
return self.cons_mat[self.names].values
@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
class Constraints(object):
def __init__(self,
risk_exp: Optional[np.ndarray]=None,
risk_names: Optional[np.ndarray]=None):
risk_exp: Optional[np.ndarray] = None,
risk_names: Optional[np.ndarray] = None):
self.risk_exp = risk_exp
if risk_names is not None:
......@@ -68,4 +188,4 @@ if __name__ == '__main__':
cons = Constraints(risk_exp, risk_names)
cons.set_constraints('b', 0.0, 0.1)
print(cons.risk_targets())
\ No newline at end of file
print(cons.risk_targets())
......@@ -7,7 +7,14 @@ Created on 2017-7-20
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.constraints import LinearConstraints
class TestConstraints(unittest.TestCase):
......@@ -43,6 +50,100 @@ class TestConstraints(unittest.TestCase):
np.testing.assert_array_almost_equal(risk_targets[0], np.array([-0.1, -np.inf, -0.1]))
np.testing.assert_array_almost_equal(risk_targets[1], np.array([0.1, np.inf, 0.1]))
def test_absolute_box_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.ABSOLUTE,
-0.8)
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.ABSOLUTE,
1.1)
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 1.4)
self.assertAlmostEqual(u, 3.3)
def test_relative_box_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.RELATIVE,
0.8)
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.RELATIVE,
1.1)
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 1.76)
self.assertAlmostEqual(u, 2.42)
def test_create_box_bounds_single_value(self):
names = ['a', 'b', 'c']
b_type = BoundaryType.RELATIVE
l_val = 0.8
u_val = 1.1
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
for key, bound in bounds.items():
l_bound = bound.lower
u_bound = bound.upper
self.assertEqual(l_bound.b_type, b_type)
self.assertEqual(u_bound.b_type, b_type)
self.assertAlmostEqual(l_bound.val, l_val)
self.assertAlmostEqual(u_bound.val, u_val)
def test_create_box_bounds_multiple_values(self):
names = ['a', 'b', 'c']
b_type = BoundaryType.RELATIVE
l_val = [0.9, 0.8, 1.1]
u_val = [1.1, 1.2, 1.3]
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
for i, name in enumerate(names):
bound = bounds[name]
l_bound = bound.lower
u_bound = bound.upper
self.assertEqual(l_bound.b_type, b_type)
self.assertEqual(u_bound.b_type, b_type)
self.assertAlmostEqual(l_bound.val, l_val[i])
self.assertAlmostEqual(u_bound.val, u_val[i])
def test_linear_constraints(self):
cons_mat = np.random.randn(100, 3)
backbone = np.random.randn(100)
names = ['a', 'b', 'c']
cons_mat = pd.DataFrame(cons_mat, columns=names)
b_type = BoundaryType.ABSOLUTE
l_val = -0.8
u_val = 1.1
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
constraints = LinearConstraints(bounds=bounds,
cons_mat=cons_mat,
backbone=backbone)
l_bounds, u_bounds = constraints.risk_targets()
risk_exp = constraints.risk_exp
for i, name in enumerate(names):
center = risk_exp[:, i] @ backbone
self.assertAlmostEqual(center + l_val, l_bounds[i])
self.assertAlmostEqual(center + u_val, u_bounds[i])
if __name__ == '__main__':
unittest.main()
This diff is collapsed.
arrow >= 0.10.0
cython >= 0.25.2
deprecated >= 1.1.0
numpy >= 1.12.1
pandas >= 0.19.2
scikit-learn >= 0.18.1
......
......@@ -14,7 +14,7 @@ from Cython.Build import cythonize
from distutils.extension import Extension
import numpy as np
VERSION = "0.1.0"
VERSION = "0.1.1"
if platform.system() != "Windows":
import multiprocessing
......
Subproject commit a187ed6c8f3aa40b47d5be80667cbbe6a6fd563d
Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment