Unverified Commit 72e4179f authored by lion-sing's avatar lion-sing Committed by GitHub

Merge pull request #3 from alpha-miner/master

update
parents ebd7a27f eac7bef8
......@@ -37,6 +37,7 @@ install:
- pip install simpleutils
- pip install coveralls
- pip install finance-python
- pip install deprecated
- export CWD=$PWD
- cd /usr/src/gtest
- sudo cmake CMakeLists.txt
......
......@@ -48,11 +48,11 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
* Linux
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
   在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran):
```bash
build_linux_dependencies.sh
```
```
## 安装
......
......@@ -6,4 +6,4 @@ Created on 2017-4-25
"""
__version__ = "0.1.0"
__version__ = "0.1.1"
......@@ -7,11 +7,13 @@ Created on 2017-5-25
from typing import Optional
from typing import Tuple
from typing import Union
import numpy as np
import pandas as pd
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.longshortbulder import long_short_build
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.linearbuilder import linear_build
......@@ -60,7 +62,7 @@ def factor_analysis(factors: pd.DataFrame,
def er_portfolio_analysis(er: np.ndarray,
industry: np.ndarray,
dx_return: np.ndarray,
constraints: Optional[Constraints]=None,
constraints: Optional[Union[LinearConstraints, Constraints]]=None,
detail_analysis=True,
benchmark: Optional[np.ndarray] = None,
is_tradable: Optional[np.ndarray] = None,
......
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
from alphamind.data.standardize import standardize
def factor_turn_over(factor_values: np.ndarray,
trade_dates: np.ndarray,
codes: np.ndarray,
use_standize: bool=True):
if use_standize:
factor_values = standardize(factor_values, trade_dates)
if __name__ == '__main__':
from alphamind.api import *
engine = SqlEngine()
factor = 'ep_q'
freq = '5b'
start_date = '2017-06-01'
end_date = '2017-08-01'
universe = Universe('custom', ['zz500'])
......@@ -14,6 +14,10 @@ from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.data.engines.sqlengine import risk_styles
......@@ -24,6 +28,7 @@ from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.neutralize import neutralize
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.data.engines.utilities import industry_list
from alphamind.model import LinearRegression
from alphamind.model import LassoRegression
......@@ -37,6 +42,7 @@ from alphamind.model import XGBTrainer
from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
......@@ -56,6 +62,10 @@ __all__ = [
'Universe',
'factor_processing',
'Constraints',
'LinearConstraints',
'BoundaryType',
'BoundaryDirection',
'create_box_bounds',
'evolve_positions',
'risk_styles',
'industry_styles',
......@@ -65,8 +75,10 @@ __all__ = [
'projection',
'neutralize',
'factor_tables',
'industry_list',
'fetch_data_package',
'fetch_train_phase',
'fetch_predict_phase',
'LinearRegression',
'LassoRegression',
'ConstLinearModel',
......
......@@ -670,6 +670,7 @@ class Experimental(Base):
val_q = Column(Float(53))
ep_q = Column(Float(53))
ep_q_d_1w = Column(Float(53))
ev = Column(Float(53))
class FactorMaster(Base):
......
# -*- coding: utf-8 -*-
"""
Created on 2018-1-24
@author: cheng.li
"""
INDUSTRY_MAPPING = {
'sw': {
1: ["采掘", "传媒", "电气设备", "电子", "房地产", "纺织服装", "非银金融", "钢铁", "公用事业", "国防军工", "化工", "机械设备", "计算机", "家用电器", "建筑材料",
"建筑装饰", "交通运输", "农林牧渔", "汽车", "轻工制造", "商业贸易", "食品饮料", "通信", "休闲服务", "医药生物", "银行", "有色金属", "综合"],
2: ["白色家电", "半导体", "包装印刷", "保险", "玻璃制造", "采掘服务", "餐饮", "畜禽养殖", "船舶制造", "地面兵装", "电机", "电力", "电气自动化设备", "电源设备",
"电子制造", "动物保健", "多元金融", "房地产开发", "房屋建设", "纺织制造", "服装家纺", "钢铁", "港口", "高低压设备", "高速公路", "工业金属", "公交", "光学光电子",
"航空运输", "航空装备", "航天装备", "航运", "互联网传媒", "化学纤维", "化学原料", "化学制品", "化学制药", "环保工程及服务", "黄金", "机场", "基础建设",
"计算机设备", "计算机应用", "家用轻工", "金属非金属新材料", "金属制品", "景点", "酒店", "林业", "旅游综合", "贸易", "煤炭开采", "农产品加工", "农业综合",
"其他采掘", "其他电子", "其他建材", "其他交运设备", "其他轻工制造", "其他休闲服务", "汽车服务", "汽车零部件", "汽车整车", "燃气", "商业物业经营", "生物制品",
"石油化工", "石油开采", "食品加工", "视听器材", "水泥制造", "水务", "饲料", "塑料", "铁路运输", "通信设备", "通信运营", "通用机械", "文化传媒", "物流",
"稀有金属", "橡胶", "一般零售", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "银行", "饮料制造", "营销传播", "渔业", "元件", "园林工程", "园区开发",
"运输设备", "造纸", "证券", "中药", "种植业", "专业工程", "专业零售", "专用设备", "装修装饰", "综合"],
3: ["IT服务", "LED", "氨纶", "白酒", "百货", "半导体材料", "包装印刷", "保险", "被动元件", "冰箱", "玻璃制造", "玻纤", "彩电", "餐饮", "超市",
"城轨建设", "乘用车", "储能设备", "畜禽养殖", "船舶制造", "纯碱", "磁性材料", "氮肥", "低压设备", "涤纶", "地面兵装", "电机", "电网自动化", "电子零部件制造",
"电子系统组装", "动物保健", "多业态零售", "多元金融", "房地产开发", "房屋建设", "纺织服装设备", "纺织化学用品", "非金属新材料", "分立器件", "风电设备", "氟化工及制冷剂",
"辅料", "复合肥", "改性塑料", "钢结构", "港口", "高速公路", "高压设备", "工程机械", "工控自动化", "公交", "管材", "光伏设备", "光学元件", "国际工程承包",
"果蔬加工", "海洋捕捞", "航空运输", "航空装备", "航天装备", "航运", "合成革", "互联网信息服务", "化学工程", "化学原料药", "化学制剂", "环保工程及服务", "环保设备",
"黄金", "黄酒", "火电", "火电设备", "机场", "机床工具", "机械基础件", "集成电路", "计量仪表", "计算机设备", "家电零部件", "家纺", "家具", "钾肥", "焦炭加工",
"金属新材料", "金属制品", "酒店", "聚氨酯", "空调", "锂", "粮食种植", "粮油加工", "林业", "磷肥", "磷化工及磷酸盐", "楼宇设备", "路桥施工", "轮胎",
"旅游综合", "铝", "氯碱", "毛纺", "贸易", "煤炭开采", "棉纺", "民爆用品", "磨具磨料", "耐火材料", "男装", "内燃机", "农药", "农业综合", "农用机械",
"女装", "啤酒", "平面媒体", "葡萄酒", "普钢", "其他采掘", "其他采掘服务", "其他电子", "其他纺织", "其他服装", "其他互联网服务", "其他化学原料", "其他化学制品",
"其他基础建设", "其他家用轻工", "其他建材", "其他交运设备", "其他酒类", "其他农产品加工", "其他轻工制造", "其他塑料制品", "其他文化传媒", "其他稀有小金属", "其他纤维",
"其他橡胶制品", "其他休闲服务", "其他种植业", "其他专业工程", "其它电源设备", "其它视听器材", "其它通用机械", "其它专用机械", "汽车服务", "汽车零部件", "铅锌",
"燃机发电", "燃气", "热电", "人工景点", "日用化学产品", "肉制品", "乳品", "软件开发", "软饮料", "商用载货车", "商用载客车", "生物制品", "石油加工", "石油开采",
"石油贸易", "食品综合", "水产养殖", "水电", "水利工程", "水泥制造", "水务", "丝绸", "饲料", "炭黑", "特钢", "调味发酵品", "铁路建设", "铁路设备", "铁路运输",
"通信传输设备", "通信配套服务", "通信运营", "铜", "涂料油漆油墨制造", "维纶", "文娱用品", "钨", "无机盐", "物流", "稀土", "洗衣机", "显示器件", "线缆部件及其他",
"小家电", "鞋帽", "新能源发电", "休闲服装", "冶金矿采化工设备", "一般物业经营", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "移动互联网服务", "银行", "印染",
"印刷包装机械", "印制电路板", "营销服务", "影视动漫", "油气钻采服务", "有线电视网络", "园林工程", "园区开发", "造纸", "粘胶", "证券", "制冷空调设备", "中压设备",
"中药", "终端设备", "种子生产", "重型机械", "珠宝首饰", "专业连锁", "专业市场", "装修装饰", "自然景点", "综合", "综合电力设备商"]
},
'sw_adj': {
1: ["建筑材料", "机械设备", "家用电器", "交通运输", "化工", "纺织服装", "电气设备", "多元金融", "通信", "传媒", "信息服务", "银行", "农林牧渔", "建筑装饰",
"计算机", "轻工制造", "交运设备", "信息设备", "钢铁", "采掘", "建筑建材", "商业贸易", "房地产", "有色金属", "国防军工", "医药生物", "汽车", "公用事业",
"保险", "休闲服务", "证券", "电子", "综合", "食品饮料"]
},
'zz': {
1: ["电信业务", "工业", "公用事业", "金融地产", "可选消费", "能源", "信息技术", "医药卫生", "原材料", "主要消费"],
2: ["半导体", "保险", "传媒", "电信服务", "房地产", "公用事业", "计算机及电子设备", "计算机运用", "家庭与个人用品", "交通运输", "零售业", "耐用消费品与服装", "能源",
"其他金融", "汽车与汽车零部件", "商业服务与用品", "食品、饮料与烟草", "食品与主要用品零售", "通信设备", "消费者服务", "医疗器械与服务", "医药生物", "银行", "原材料",
"资本品", "资本市场"],
3: ["半导体", "包装食品与肉类", "保险", "传媒", "道路运输", "电力", "电脑与外围设备", "电气设备", "电网", "电信运营服务", "电信增值服务", "电子设备", "多元化零售",
"房地产管理与服务", "房地产开发与园区", "纺织服装", "非金属采矿及制品", "钢铁", "个人用品", "工业集团企业", "供热或其他公用事业", "航空公司", "航空航天与国防",
"航空货运与物流", "航运", "互联网服务", "互联网零售", "化学原料", "化学制品", "环保设备、工程与服务", "机械制造", "家常用品", "家庭耐用消费品", "建筑材料", "建筑产品",
"建筑与工程", "交通基本设施", "酒店、餐馆与休闲", "煤炭", "能源开采设备与服务", "农牧渔产品", "其他金融服务", "其他零售", "汽车零配件与轮胎", "汽车与摩托车", "燃气",
"日用品经销商", "容器与包装", "软件开发", "商业服务与用品", "商业银行", "生物科技", "石油与天然气", "食品与主要用品零售", "水务", "通信设备", "消费信贷", "信息技术服务",
"休闲设备与用品", "医疗器械", "医疗用品与服务提供商", "饮料", "有色金属", "纸类与林业产品", "制药", "制药与生物科技服务", "珠宝与奢侈品", "资本市场", "综合消费者服务"]
},
'zjh': {
1: ["采矿业", "电力、热力、燃气及水生产和供应业", "房地产业", "建筑业", "交通运输、仓储和邮政业", "教育", "金融业", "居民服务、修理和其他服务业", "科学研究和技术服务业",
"农、林、牧、渔业", "批发和零售业", "水利、环境和公共设施管理业", "卫生和社会工作", "文化、体育和娱乐业", "信息传输、软件和信息技术服务业", "制造业", "住宿和餐饮业", "综合",
"租赁和商务服务业"],
2: ["保险业", "餐饮业", "仓储业", "畜牧业", "道路运输业", "电力、热力生产和供应业", "电气机械和器材制造业", "电信、广播电视和卫星传输服务", "房地产业", "房屋建筑业",
"纺织服装、服饰业", "纺织业", "非金属矿采选业", "非金属矿物制品业", "废弃资源综合利用业", "公共设施管理业", "广播、电视、电影和影视录音制作业", "航空运输业", "黑色金属矿采选业",
"黑色金属冶炼和压延加工业", "互联网和相关服务", "化学纤维制造业", "化学原料和化学制品制造业", "货币金融服务", "机动车、电子产品和日用产品修理业", "计算机、通信和其他电子设备制造业",
"家具制造业", "建筑安装业", "建筑装饰和其他建筑业", "教育", "金属制品业", "酒、饮料和精制茶制造业", "开采辅助活动", "林业", "零售业", "煤炭开采和洗选业",
"木材加工和木、竹、藤、棕、草制品业", "农、林、牧、渔服务业", "农副食品加工业", "农业", "批发业", "皮革、毛皮、羽毛及其制品和制鞋业", "其他金融业", "其他制造业", "汽车制造业",
"燃气生产和供应业", "软件和信息技术服务业", "商务服务业", "生态保护和环境治理业", "石油和天然气开采业", "石油加工、炼焦和核燃料加工业", "食品制造业", "水的生产和供应业",
"水利管理业", "水上运输业", "体育", "铁路、船舶、航空航天和其它运输设备制造业", "铁路运输业", "通用设备制造业", "土木工程建筑业", "卫生", "文化艺术业",
"文教、工美、体育和娱乐用品制造业", "橡胶和塑料制品业", "新闻和出版业", "研究和试验发展", "医药制造业", "仪器仪表制造业", "印刷和记录媒介复制业", "邮政业", "有色金属矿采选业",
"有色金属冶炼和压延加工业", "渔业", "造纸和纸制品业", "住宿业", "专业技术服务业", "专用设备制造业", "装卸搬运和运输代理业", "资本市场服务", "综合", "租赁业"],
},
'dx': {
1: ["Cyclical", "Defensive", "Sensitive"],
2: ["ConsumerDiscretionary", "ConsumerStaples", "Financials", "HealthCare", "Industrials", "IT", "Materials",
"RealEstate", "Utilities"]
}
}
......@@ -42,6 +42,7 @@ from alphamind.data.engines.utilities import _map_factors
from alphamind.data.engines.utilities import _map_industry_category
from alphamind.data.engines.utilities import _map_risk_model_table
from alphamind.data.engines.utilities import factor_tables
from alphamind.data.engines.utilities import industry_list
from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA',
......@@ -207,12 +208,12 @@ class SqlEngine(object):
cond = universe._query_statements(start_date, end_date, None)
big_table = join(Market, UniverseTable,
and_(
Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
cond
)
)
and_(
Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
cond
)
)
query = select([Market.trade_date, Market.code, stats]) \
.select_from(big_table)
......@@ -379,7 +380,7 @@ class SqlEngine(object):
FullFactor.code == UniverseTable.code,
cond
)
)
)
query = select(
[FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
......@@ -498,7 +499,7 @@ class SqlEngine(object):
FullFactor.code == UniverseTable.code,
cond
)
)
)
query = select(
[FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
......@@ -508,20 +509,24 @@ class SqlEngine(object):
if universe.is_filtered:
codes = universe.query(self, start_date, end_date, dates)
risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(
['trade_date', 'code'])
return risk_cov, risk_exp
def fetch_industry(self,
ref_date: str,
codes: Iterable[int],
category: str = 'sw'):
category: str = 'sw',
level: int = 1):
industry_category_name = _map_industry_category(category)
code_name = 'industryID' + str(level)
category_name = 'industryName' + str(level)
query = select([Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]).where(
getattr(Industry, code_name).label('industry_code'),
getattr(Industry, category_name).label('industry')]).where(
and_(
Industry.trade_date == ref_date,
Industry.code.in_(codes),
......@@ -531,14 +536,36 @@ class SqlEngine(object):
return pd.read_sql(query, self.engine)
def fetch_industry_matrix(self,
ref_date: str,
codes: Iterable[int],
category: str = 'sw',
level: int = 1):
df = self.fetch_industry(ref_date, codes, category, level)
df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
industries = industry_list(category, level)
in_s = []
out_s = []
for i in industries:
if i in df:
in_s.append(i)
else:
out_s.append(i)
res = df[['code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
return res
def fetch_industry_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw'):
category: str = 'sw',
level: int = 1):
industry_category_name = _map_industry_category(category)
cond = universe._query_statements(start_date, end_date, dates)
big_table = join(Industry, UniverseTable,
......@@ -547,13 +574,15 @@ class SqlEngine(object):
Industry.code == UniverseTable.code,
Industry.industry == industry_category_name,
cond
)
)
))
code_name = 'industryID' + str(level)
category_name = 'industryName' + str(level)
query = select([Industry.trade_date,
Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]).select_from(big_table).distinct()
getattr(Industry, code_name).label('industry_code'),
getattr(Industry, category_name).label('industry')]).select_from(big_table).distinct()
df = pd.read_sql(query, self.engine)
if universe.is_filtered:
......@@ -561,7 +590,46 @@ class SqlEngine(object):
df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
return df
def fetch_data(self, ref_date: str,
def fetch_industry_matrix_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw',
level: int = 1):
df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
industries = industry_list(category, level)
in_s = []
out_s = []
for i in industries:
if i in df:
in_s.append(i)
else:
out_s.append(i)
res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0]*len(out_s))))
return res
def fetch_trade_status(self,
ref_date: str,
codes: Iterable[int]):
query = select([Market.code, Market.isOpen]).where(
and_(
Market.trade_date == ref_date,
Market.code.in_(codes)
)
)
return pd.read_sql(query, self.engine).sort_values(['code'])
def fetch_data(self,
ref_date: str,
factors: Iterable[str],
codes: Iterable[int],
benchmark: int = None,
......@@ -802,10 +870,10 @@ class SqlEngine(object):
else:
id_filter = 'in_'
t = select([table.trade_id]).\
t = select([table.trade_id]). \
where(and_(table.trade_date <= ref_date,
table.operation == 'withdraw')).alias('t')
query = select([table]).\
query = select([table]). \
where(and_(getattr(table.trade_id, id_filter)(t),
table.trade_date <= ref_date,
table.operation == 'lend'))
......@@ -823,7 +891,7 @@ class SqlEngine(object):
rule = x['price_rule'].split('@')
if rule[0] in ['closePrice', 'openPrice']:
query = select([getattr(Market, rule[0])]).\
query = select([getattr(Market, rule[0])]). \
where(and_(Market.code == code, Market.trade_date == rule[1]))
data = pd.read_sql(query, self.engine)
if not data.empty:
......@@ -835,6 +903,7 @@ class SqlEngine(object):
else:
raise KeyError('do not have rule for %s' % x['price_rule'])
return price
df['price'] = df.apply(lambda x: parse_price_rule(x), axis=1)
df.drop(['remark', 'price_rule', 'operation'], axis=1, inplace=True)
......@@ -848,12 +917,10 @@ class SqlEngine(object):
if __name__ == '__main__':
universe = Universe('ss', ['hs300'])
engine = SqlEngine()
df = engine.fetch_outright_status('2017-12-28')
print(df)
ref_date = '2017-12-28'
codes = universe.query(engine, dates=[ref_date])
df = engine.fetch_trade_status(ref_date, codes.code.tolist())
print(df)
\ No newline at end of file
......@@ -13,6 +13,7 @@ from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import FullFactor
from alphamind.data.dbmodel.models import Gogoal
from alphamind.data.dbmodel.models import Experimental
from alphamind.data.engines.industries import INDUSTRY_MAPPING
factor_tables = [FullFactor, Gogoal, Experimental]
......@@ -43,5 +44,17 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
def _map_industry_category(category: str) -> str:
if category == 'sw':
return '申万行业分类'
if category == 'sw_adj':
return '申万行业分类修订'
elif category == 'zz':
return '中证行业分类'
elif category == 'dx':
return '东兴行业分类'
elif category == 'zjh':
return '证监会行业V2012'
else:
raise ValueError("No other industry is supported at the current time")
\ No newline at end of file
raise ValueError("No other industry is supported at the current time")
def industry_list(category: str, level: int=1) -> list:
return INDUSTRY_MAPPING[category][level]
\ No newline at end of file
......@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std', ddof)
return (x - mean_values) / std_values
return (x - mean_values) / np.maximum(std_values, 1e-8)
else:
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
......@@ -48,7 +48,7 @@ class Standardizer(object):
self.std_ = simple_std(x, axis=0, ddof=self.ddof_)
def transform(self, x: np.ndarray) -> np.ndarray:
return (x - self.mean_) / self.std_
return (x - self.mean_) / np.maximum(self.std_, 1e-8)
class GroupedStandardizer(object):
......@@ -69,4 +69,4 @@ class GroupedStandardizer(object):
def transform(self, x: np.ndarray) -> np.ndarray:
groups = x[:, 0].astype(int)
index = array_index(self.labels_, groups)
return (x[:, 1:] - self.mean_[index]) / self.std_[index]
return (x[:, 1:] - self.mean_[index]) / np.maximum(self.std_[index], 1e-8)
......@@ -5,6 +5,7 @@ Created on 2017-8-23
@author: cheng.li
"""
import copy
import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
......@@ -58,7 +59,7 @@ class Transformer(object):
def __init__(self,
expressions):
expression_dict, expression_dependency = \
factor_translator(expressions)
factor_translator(copy.deepcopy(expressions))
if expression_dict:
self.names = sorted(expression_dict.keys())
......
......@@ -20,195 +20,249 @@ plt.style.use('ggplot')
Back test parameter settings
"""
start_date = '2015-01-01'
end_date = '2017-11-28'
benchmark_code = 300
universe_name = ['hs300']
universe = Universe(universe_name, universe_name)
frequency = '5b'
start_date = '2010-01-01'
end_date = '2018-01-26'
frequency = '10b'
method = 'risk_neutral'
use_rank = 100
industry_lower = 1.
industry_upper = 1.
neutralize_risk = ['SIZE'] + industry_styles
constraint_risk = ['SIZE'] + industry_styles
size_risk_lower = 0
size_risk_upper = 0
turn_over_target_base = 0.25
benchmark_total_lower = 1.
benchmark_total_upper = 1.
turn_over_target_base = 0.30
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
horizon = map_freq(frequency)
executor = NaiveExecutor()
engine = SqlEngine()
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = ['alpha_factor']
#const_features = {alpha_name[0]: LAST('optimism_confidence_25d') + LAST('pessimism_confidence_25d')}
# const_features = {alpha_name[0]: CSRes(DIFF(1. / LAST('PE')), LAST('roe_q'))}
simple_expression = LAST('cfinc1_q') # CSRes(CSRes(LAST('DividendPS'), LAST('roe_q')), LAST('ep_q'))
const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = []
def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True, neutralize_factors=None):
factor_groups = const_model_factor_data.groupby('trade_date')
"""
Model phase: we need 1 constant linear model and one linear regression model
"""
alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
ref_date = date.strftime('%Y-%m-%d')
index_dates.append(date)
if neutralize_factors:
prev_factors = []
for i, f in enumerate(neutralize_factors):
pure_factor = LAST(f)
for j in range(i):
pure_factor = CSRes(pure_factor, prev_factors[j])
prev_factors.append(pure_factor)
total_data = data.fillna(data[alpha_name].median())
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data))
ubound = 0.02 + benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
constraint = Constraints(risk_exp_expand, risk_names)
for i, name in enumerate(risk_names):
if name == 'total':
constraint.set_constraints(name,
lower_bound=risk_target[i],
upper_bound=risk_target[i])
elif name == 'SIZE':
base_target = abs(risk_target[i])
constraint.set_constraints(name,
lower_bound=risk_target[i] + base_target * size_risk_lower,
upper_bound=risk_target[i] + base_target * size_risk_upper)
elif name == 'benchmark_total':
base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
else:
constraint.set_constraints(name,
lower_bound=risk_target[i] * industry_lower,
upper_bound=risk_target[i] * industry_upper)
factor_values = factor_processing(total_data[alpha_name].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er = const_model.predict(factor_values)
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
simple_expression = LAST(factor_name)
for f in prev_factors:
simple_expression = CSRes(simple_expression, f)
else:
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values
try:
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
use_rank=use_rank,
turn_over_target=turn_over_target,
current_position=current_position,
lbound=lbound,
ubound=ubound)
except ValueError:
alpha_logger.info('{0} full re-balance'.format(date))
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
use_rank=use_rank,
lbound=lbound,
ubound=ubound)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0}'.format(frequency),
secondary_y='tc_cost')
plt.show()
simple_expression = LAST(factor_name)
if not positive:
simple_expression = -simple_expression
const_features = {alpha_name[0]: simple_expression}
const_weights = np.array([1.])
const_model = ConstLinearModel(features=alpha_name,
weights=const_weights)
ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
const_model_factor_data = engine.fetch_data_range(universe,
const_features,
dates=ref_dates,
benchmark=benchmark_code)['factor'].dropna()
horizon = map_freq(frequency)
rets = []
turn_overs = []
leverags = []
previous_pos = pd.DataFrame()
index_dates = []
factor_groups = const_model_factor_data.groupby('trade_date')
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
index_dates.append(date)
total_data = data.fillna(data[alpha_name].median())
alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
risk_exp = total_data[neutralize_risk].values.astype(float)
industry = total_data.industry_code.values
benchmark_w = total_data.weight.values
constraint_exp = total_data[constraint_risk].values
risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
risk_names = constraint_risk + ['total']
risk_target = risk_exp_expand.T @ benchmark_w
lbound = np.maximum(0., benchmark_w - 0.02) # np.zeros(len(total_data))
ubound = 0.02 + benchmark_w
is_in_benchmark = (benchmark_w > 0.).astype(float)
risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
risk_names.append('benchmark_total')
constraint = Constraints(risk_exp_expand, risk_names)
for j, name in enumerate(risk_names):
if name == 'total':
constraint.set_constraints(name,
lower_bound=risk_target[j],
upper_bound=risk_target[j])
elif name == 'SIZE':
base_target = abs(risk_target[j])
constraint.set_constraints(name,
lower_bound=risk_target[j] + base_target * size_risk_lower,
upper_bound=risk_target[j] + base_target * size_risk_upper)
elif name == 'benchmark_total':
base_target = benchmark_w.sum()
constraint.set_constraints(name,
lower_bound=benchmark_total_lower * base_target,
upper_bound=benchmark_total_upper * base_target)
else:
constraint.set_constraints(name,
lower_bound=risk_target[j] * industry_lower,
upper_bound=risk_target[j] * industry_upper)
factor_values = factor_processing(total_data[alpha_name].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
# const linear model
er = const_model.predict(factor_values)
codes = total_data['code'].values
if previous_pos.empty:
current_position = None
turn_over_target = None
else:
previous_pos.set_index('code', inplace=True)
remained_pos = previous_pos.loc[codes]
remained_pos.fillna(0., inplace=True)
turn_over_target = turn_over_target_base
current_position = remained_pos.weight.values
try:
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
turn_over_target=turn_over_target,
current_position=current_position,
lbound=lbound,
ubound=ubound)
except ValueError:
alpha_logger.info('{0} full re-balance'.format(date))
target_pos, _ = er_portfolio_analysis(er,
industry,
None,
constraint,
False,
benchmark_w,
method=method,
lbound=lbound,
ubound=ubound)
target_pos['code'] = total_data['code'].values
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
leverage = result.weight_x.abs().sum()
ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
rets.append(np.log(1. + ret))
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverags.append(leverage)
previous_pos = executed_pos
alpha_logger.info('{0} is finished'.format(date))
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=index_dates)
# index return
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
offset=1).set_index('trade_date')
ret_df['index'] = index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
return alpha_name[0], ret_df
def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q']
engine = SqlEngine()
benchmark_code = 905
universe_name = ['zz500']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True, neutralize_factors=neutralize_factors)
def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q']
engine = SqlEngine()
benchmark_code = 905
universe_name = ['zz500']
universe = Universe('custom', universe_name)
return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False, neutralize_factors=neutralize_factors)
if __name__ == '__main__':
# from dask.distributed import Client
#
# client = Client('10.63.6.176:8786')
#
# engine = SqlEngine()
# df = engine.fetch_factor_coverage()
# df = df[df.universe == 'zz800'].groupby('factor').mean()
# df = df[df.coverage >= 0.98]
#
# tasks = client.map(worker_func_positive, df.index.tolist())
# res1 = client.gather(tasks)
#
# tasks = client.map(worker_func_negative, df.index.tolist())
# res2 = client.gather(tasks)
#
# factor_df = pd.DataFrame()
#
# for f_name, df in res1:
# factor_df[f_name] = df['returns']
#
# for f_name, df in res2:
# factor_df[f_name] = df['returns']
factor_name = LAST('ep_q') # LAST('EBITDA') / LAST('ev')
f_name, ret_df = worker_func_positive(factor_name)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
frequency, factor_name, 905),
secondary_y='tc_cost')
plt.show()
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
def factor_residue_analysis(start_date,
end_date,
factor_name,
factor,
freq,
universe,
engine):
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
n_bins = 5
horizon = map_freq(freq)
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: factor}
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df['$top1 - bottom1$'] = df[4] - df[0]
return df
def factor_analysis(f_name):
from alphamind.api import SqlEngine, Universe, alpha_logger
engine = SqlEngine()
universe = Universe('custom', ['zz800'])
base1 = LAST('Alpha60')
base2 = CSRes('roe_q', base1)
base3 = CSRes(CSRes('ep_q', base1), base2)
factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
res = factor_residue_analysis('2010-01-01',
'2018-01-26',
f_name,
factor,
'10b',
universe,
engine)
alpha_logger.info('{0} is done'.format(f_name))
return f_name, res
if __name__ == '__main__':
from dask.distributed import Client
client = Client('10.63.6.176:8786')
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
tasks = client.map(factor_analysis, df.index.tolist())
res = client.gather(tasks)
for f_name, df in res:
factor_df[f_name] = df['$top1 - bottom1$']
# for i, f_name in enumerate(df.index):
# base1 = LAST('Alpha60')
# base2 = CSRes('roe_q', base1)
# base3 = CSRes(CSRes('ep_q', base1), base2)
# factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
# res = factor_residue_analysis('2010-01-01',
# '2018-01-22',
# f_name,
# factor,
# '10b',
# universe,
# engine)
# factor_df[f_name] = res['$top1 - bottom1$']
# alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
......@@ -19,9 +19,12 @@ start = dt.datetime.now()
universe = Universe('custom', ['zz800'])
simple_expression = CSRes(LAST('OperCashInToAsset'), 'roe_q')
factor_name = 'Beta20'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
simple_expression = CSRes(CSRes(LAST(factor_name), base1), base2)
alpha_factor_name = 'alpha_factor'
alpha_factor_name = factor_name + '_res'
alpha_factor = {alpha_factor_name: simple_expression}
# end of formula definition
......@@ -29,7 +32,7 @@ alpha_factor = {alpha_factor_name: simple_expression}
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
freq = '10b'
freq = '5b'
n_bins = 5
horizon = map_freq(freq)
......@@ -91,8 +94,6 @@ df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(alpha
# =================================================================== #
factor_name = 'PE'
alpha_factor_name = alpha_factor_name + '_1w_diff'
alpha_factor = {alpha_factor_name: DIFF(simple_expression)}
......
......@@ -402,9 +402,9 @@ if __name__ == '__main__':
universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = ['SIZE']
res = fetch_predict_phase(engine, ['ep_q'],
'2018-01-08',
'5b',
universe,
16,
neutralized_risk=neutralized_risk)
'2012-01-05',
'5b',
universe,
16,
neutralized_risk=neutralized_risk)
print(res)
......@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):
def __init__(self,
features: list = None,
formulas: dict = None,
weights: np.ndarray = None):
super().__init__(features)
super().__init__(features, formulas=formulas)
if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights),
ValueError,
......@@ -56,8 +57,8 @@ class ConstLinearModel(ModelBase):
class LinearRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
......@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
......@@ -112,8 +113,8 @@ class LassoRegression(ModelBase):
class LogisticRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......
......@@ -6,6 +6,7 @@ Created on 2017-9-4
"""
import abc
import copy
import arrow
import numpy as np
from alphamind.utilities import alpha_logger
......@@ -15,10 +16,13 @@ from alphamind.utilities import decode
class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None):
def __init__(self, features: list=None, formulas: dict=None):
if features is not None:
self.features = list(features)
else:
self.features = None
self.impl = None
self.formulas = copy.deepcopy(formulas)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
......@@ -43,6 +47,7 @@ class ModelBase(metaclass=abc.ABCMeta):
features=list(self.features),
trained_time=self.trained_time,
desc=encode(self.impl),
formulas=encode(self.formulas),
internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
return model_desc
......@@ -50,6 +55,7 @@ class ModelBase(metaclass=abc.ABCMeta):
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.formulas = decode(model_desc['formulas'])
obj_layout.trained_time = model_desc['trained_time']
obj_layout.impl = decode(model_desc['desc'])
return obj_layout
......
......@@ -28,7 +28,7 @@ class RandomForestRegressor(ModelBase):
max_features: str='auto',
features: List=None,
**kwargs):
super().__init__(features)
super().__init__(features, **kwargs)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -61,8 +61,9 @@ class RandomForestClassifier(ModelBase):
n_estimators: int=100,
max_features: str='auto',
features: List = None,
formulas: dict = None,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
......@@ -96,11 +97,14 @@ class XGBRegressor(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features: List=None,
formulas: dict = None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
n_jobs=n_jobs,
**kwargs)
def save(self) -> dict:
......@@ -131,12 +135,15 @@ class XGBClassifier(ModelBase):
learning_rate: float=0.1,
max_depth: int=3,
features: List = None,
formulas: dict = None,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
learning_rate=learning_rate,
max_depth=max_depth,
n_jobs=n_jobs,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
......@@ -173,9 +180,11 @@ class XGBTrainer(ModelBase):
subsample=1.,
colsample_bytree=1.,
features: List = None,
random_state=0,
formulas: dict = None,
random_state: int=0,
n_jobs: int=1,
**kwargs):
super().__init__(features)
super().__init__(features, formulas=formulas)
self.params = {
'silent': 1,
'objective': objective,
......@@ -185,6 +194,7 @@ class XGBTrainer(ModelBase):
'tree_method': tree_method,
'subsample': subsample,
'colsample_bytree': colsample_bytree,
'nthread': n_jobs,
'seed': random_state
}
......
# -*- coding: utf-8 -*-
"""
Created on 2018-2-6
@author: cheng.li
"""
from typing import List
from typing import Tuple
from math import inf
import copy
from PyFin.api import pyFinAssert
class Allocation(object):
def __init__(self,
code: int,
minimum: int=0,
maximum: int=inf,
current: int=0):
self.code = code
self.minimum = minimum
self.maximum = maximum
self.current = current
pyFinAssert(self.minimum <= self.current, ValueError, "minimum qty should be lower than current")
pyFinAssert(self.maximum >= self.current, ValueError, "minimum qty should be greater than current")
def __repr__(self):
return "Allocation(code={0}, minimum={1}, maximum={2}, current={3})".format(self.code,
self.minimum,
self.maximum,
self.current)
class Portfolio(object):
def __init__(self,
name: str,
allocations: List[Allocation]):
self.name = name
self.allocations = {a.code: a for a in allocations}
def __getitem__(self, code):
try:
return self.allocations[code]
except KeyError:
allocation = Allocation(code, 0, 0, 0)
self.allocations[code] = allocation
return allocation
def __repr__(self):
return "Portfolio(name={0}, allocations={1})".format(self.name,
list(self.allocations.values()))
@property
def codes(self) -> List[int]:
return sorted(self.allocations.keys())
class Execution(object):
def __init__(self,
code: int,
qty: int,
comment: str=None):
self.code = code
self.qty = qty
self.comment = comment
def __repr__(self):
return "Execution(code={0}, qty={1}, comment={2})".format(self.code,
self.qty,
self.comment)
class Executions(object):
def __init__(self,
name,
executions: List[Execution]=None):
self.name = name
self.executions = executions
def __repr__(self):
return "Executions(name={0}, executions={1})".format(self.name,
self.executions)
class Asset(object):
def __init__(self,
code: int,
name: str=None,
priority: List[str]=None,
forbidden: List[str]=None):
self.code = code
self.name = name
if priority:
self.priority = set(priority)
else:
self.priority = set()
if forbidden:
self.forbidden = set(forbidden)
else:
self.forbidden = set()
self._validation()
def _validation(self):
for p in self.priority:
pyFinAssert(p not in self.forbidden, ValueError, "{0} in priority is in forbidden".format(p))
def __repr__(self):
return "Asset(code={0}, name={1}, priority={2}, forbidden={3})".format(self.code,
self.name,
self.priority,
self.forbidden)
class TargetPositions(object):
def __init__(self,
assets: List[Asset]=None,
qtys: List[int]=None):
if assets:
self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
else:
self.targets = {}
def add_asset(self,
asset: Asset,
qty: int):
if asset.code in self.targets:
raise ValueError()
self.targets[asset.code] = (asset, qty)
def __getitem__(self, code: int) -> Tuple[Asset, int]:
return self.targets[code]
@property
def codes(self) -> List[int]:
return sorted(self.targets.keys())
def __repr__(self):
return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
def handle_one_asset(pre_allocation: Allocation,
asset: Asset,
qty: int) -> Tuple[Execution, Allocation, int]:
minimum = pre_allocation.minimum
maximum = pre_allocation.maximum
current = pre_allocation.current
code = pre_allocation.code
if qty < minimum:
raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
elif qty < maximum:
# need to buy / sell
ex = Execution(code, qty - current)
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
current=qty)
qty = 0
else:
ex = Execution(code, maximum - current)
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
current=maximum)
qty = qty - maximum
return ex, allocation, qty
def pass_through(target_pos: TargetPositions,
portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]:
p_name = portfolio.name
new_target_pos = TargetPositions()
allocations = []
executions = []
for code in target_pos.codes:
asset, qty = target_pos[code]
if asset.priority:
raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
if p_name in asset.forbidden:
ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
allocation = copy.deepcopy(portfolio[code])
new_target_pos.add_asset(asset, qty)
else:
prev_allocation = portfolio[code]
ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty)
new_target_pos.add_asset(asset, qty)
allocations.append(allocation)
executions.append(ex)
return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos
if __name__ == '__main__':
asset1 = Asset(1, 'a')
asset2 = Asset(2, 'b')
asset3 = Asset(3, 'b')
target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100])
allc1 = Allocation(1, 0, 100, 0)
allc2 = Allocation(2, 0, 400, 100)
allc2 = Allocation(3, 0, 400, 200)
portfolio = Portfolio('test1', [allc1, allc2])
executions, portfolio, target_pos = pass_through(target_pos, portfolio)
......@@ -5,17 +5,137 @@ Created on 2017-7-21
@author: cheng.li
"""
from deprecated import deprecated
from math import inf
import numpy as np
import pandas as pd
from enum import IntEnum
from typing import Tuple
from typing import Optional
from typing import Dict
from typing import List
from typing import Union
from typing import Iterable
from PyFin.api import pyFinAssert
class BoundaryDirection(IntEnum):
LOWER = -1
UPPER = 1
class BoundaryType(IntEnum):
ABSOLUTE = 0
RELATIVE = 1
class BoundaryImpl(object):
def __init__(self,
direction: BoundaryDirection,
b_type: BoundaryType,
val: float):
self.direction = direction
self.b_type = b_type
self.val = val
self._validation()
def _validation(self):
pyFinAssert(self.b_type == BoundaryType.ABSOLUTE or self.b_type == BoundaryType.RELATIVE,
ValueError,
"Boundary Type {0} is not recognized".format(self.b_type))
pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
ValueError,
"Boundary direction {0} is not recognized".format(self.direction))
def __call__(self, center: float):
if self.b_type == BoundaryType.ABSOLUTE:
return self.val + center
else:
pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
return self.val * center
class BoxBoundary(object):
def __init__(self,
lower_bound: BoundaryImpl,
upper_bound: BoundaryImpl):
self.lower = lower_bound
self.upper = upper_bound
def bounds(self, center):
l_b, u_b = self.lower(center), self.upper(center)
pyFinAssert(l_b <= u_b, ValueError, "lower bound should be lower then upper bound")
return l_b, u_b
def create_box_bounds(names: List[str],
b_type: Union[Iterable[BoundaryType], BoundaryType],
l_val: Union[Iterable[float], float],
u_val: Union[Iterable[float], float]) -> Dict[str, BoxBoundary]:
"""
helper function to quickly create a series of bounds
"""
bounds = dict()
if not hasattr(b_type, '__iter__'):
b_type = np.array([b_type] * len(names))
if not hasattr(l_val, '__iter__'):
l_val = np.array([l_val] * len(names))
if not hasattr(u_val, '__iter__'):
u_val = np.array([u_val] * len(names))
for i, name in enumerate(names):
lower = BoundaryImpl(BoundaryDirection.LOWER,
b_type[i],
l_val[i])
upper = BoundaryImpl(BoundaryDirection.UPPER,
b_type[i],
u_val[i])
bounds[name] = BoxBoundary(lower, upper)
return bounds
class LinearConstraints(object):
def __init__(self,
bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame,
backbone: np.ndarray):
pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
pyFinAssert(cons_mat.shape[0] == len(backbone),
"length of back bond should be same as number of rows of cons_mat")
self.names = list(bounds.keys())
self.bounds = bounds
self.cons_mat = cons_mat
self.backbone = backbone
def risk_targets(self) -> Tuple[np.ndarray, np.ndarray]:
lower_bounds = []
upper_bounds = []
for name in self.names:
center = self.backbone @ self.cons_mat[name].values
l, u = self.bounds[name].bounds(center)
lower_bounds.append(l)
upper_bounds.append(u)
return np.array(lower_bounds), np.array(upper_bounds)
@property
def risk_exp(self) -> np.ndarray:
return self.cons_mat[self.names].values
@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
class Constraints(object):
def __init__(self,
risk_exp: Optional[np.ndarray]=None,
risk_names: Optional[np.ndarray]=None):
risk_exp: Optional[np.ndarray] = None,
risk_names: Optional[np.ndarray] = None):
self.risk_exp = risk_exp
if risk_names is not None:
......@@ -68,4 +188,4 @@ if __name__ == '__main__':
cons = Constraints(risk_exp, risk_names)
cons.set_constraints('b', 0.0, 0.1)
print(cons.risk_targets())
\ No newline at end of file
print(cons.risk_targets())
......@@ -7,7 +7,14 @@ Created on 2017-7-20
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.constraints import LinearConstraints
class TestConstraints(unittest.TestCase):
......@@ -43,6 +50,100 @@ class TestConstraints(unittest.TestCase):
np.testing.assert_array_almost_equal(risk_targets[0], np.array([-0.1, -np.inf, -0.1]))
np.testing.assert_array_almost_equal(risk_targets[1], np.array([0.1, np.inf, 0.1]))
def test_absolute_box_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.ABSOLUTE,
-0.8)
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.ABSOLUTE,
1.1)
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 1.4)
self.assertAlmostEqual(u, 3.3)
def test_relative_box_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.RELATIVE,
0.8)
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.RELATIVE,
1.1)
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 1.76)
self.assertAlmostEqual(u, 2.42)
def test_create_box_bounds_single_value(self):
names = ['a', 'b', 'c']
b_type = BoundaryType.RELATIVE
l_val = 0.8
u_val = 1.1
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
for key, bound in bounds.items():
l_bound = bound.lower
u_bound = bound.upper
self.assertEqual(l_bound.b_type, b_type)
self.assertEqual(u_bound.b_type, b_type)
self.assertAlmostEqual(l_bound.val, l_val)
self.assertAlmostEqual(u_bound.val, u_val)
def test_create_box_bounds_multiple_values(self):
names = ['a', 'b', 'c']
b_type = BoundaryType.RELATIVE
l_val = [0.9, 0.8, 1.1]
u_val = [1.1, 1.2, 1.3]
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
for i, name in enumerate(names):
bound = bounds[name]
l_bound = bound.lower
u_bound = bound.upper
self.assertEqual(l_bound.b_type, b_type)
self.assertEqual(u_bound.b_type, b_type)
self.assertAlmostEqual(l_bound.val, l_val[i])
self.assertAlmostEqual(u_bound.val, u_val[i])
def test_linear_constraints(self):
cons_mat = np.random.randn(100, 3)
backbone = np.random.randn(100)
names = ['a', 'b', 'c']
cons_mat = pd.DataFrame(cons_mat, columns=names)
b_type = BoundaryType.ABSOLUTE
l_val = -0.8
u_val = 1.1
bounds = create_box_bounds(names,
b_type,
l_val,
u_val)
constraints = LinearConstraints(bounds=bounds,
cons_mat=cons_mat,
backbone=backbone)
l_bounds, u_bounds = constraints.risk_targets()
risk_exp = constraints.risk_exp
for i, name in enumerate(names):
center = risk_exp[:, i] @ backbone
self.assertAlmostEqual(center + l_val, l_bounds[i])
self.assertAlmostEqual(center + u_val, u_bounds[i])
if __name__ == '__main__':
unittest.main()
This source diff could not be displayed because it is too large. You can view the blob instead.
arrow >= 0.10.0
cython >= 0.25.2
deprecated >= 1.1.0
numpy >= 1.12.1
pandas >= 0.19.2
scikit-learn >= 0.18.1
......
......@@ -14,7 +14,7 @@ from Cython.Build import cythonize
from distutils.extension import Extension
import numpy as np
VERSION = "0.1.0"
VERSION = "0.1.1"
if platform.system() != "Windows":
import multiprocessing
......
Subproject commit a187ed6c8f3aa40b47d5be80667cbbe6a6fd563d
Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment