Merge pull request #3 from alpha-miner/master

update

Merge pull request #3 from alpha-miner/master
update
72e4179f · lion-sing · GitHub · ebd7a27f · eac7bef8 · 72e4179f
Unverified Commit 72e4179f authored Feb 08, 2018 by lion-sing Committed by GitHub Feb 08, 2018
26 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,6 +37,7 @@ install:
  - pip install simpleutils
  - pip install coveralls
  - pip install finance-python
+  - pip install deprecated
  - export CWD=$PWD
  - cd /usr/src/gtest
  - sudo cmake CMakeLists.txt

--- a/README.md
+++ b/README.md
@@ -48,11 +48,11 @@ alpha - mind 提供了多因子研究中常用的工具链，包括：

 * Linux

-    在linux上，需要c++编译器（例如g++）以及fortran编译器（例如gfortran)
+    在linux上，需要c++编译器（例如g++）以及fortran编译器（例如gfortran)：

    ```bash
    build_linux_dependencies.sh
-```
+    ```

 ## 安装


--- a/alphamind/__init__.py
+++ b/alphamind/__init__.py
@@ -6,4 +6,4 @@ Created on 2017-4-25
 """


-__version__ = "0.1.0"
+__version__ = "0.1.1"
--- a/alphamind/analysis/factoranalysis.py
+++ b/alphamind/analysis/factoranalysis.py
@@ -7,11 +7,13 @@ Created on 2017-5-25

 from typing import Optional
 from typing import Tuple
+from typing import Union
 import numpy as np
 import pandas as pd
 from alphamind.data.standardize import standardize
 from alphamind.data.winsorize import winsorize_normal
 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import LinearConstraints
 from alphamind.portfolio.longshortbulder import long_short_build
 from alphamind.portfolio.rankbuilder import rank_build
 from alphamind.portfolio.linearbuilder import linear_build
@@ -60,7 +62,7 @@ def factor_analysis(factors: pd.DataFrame,
 def er_portfolio_analysis(er: np.ndarray,
                          industry: np.ndarray,
                          dx_return: np.ndarray,
-                          constraints: Optional[Constraints]=None,
+                          constraints: Optional[Union[LinearConstraints, Constraints]]=None,
                          detail_analysis=True,
                          benchmark: Optional[np.ndarray] = None,
                          is_tradable: Optional[np.ndarray] = None,

--- a/alphamind/analysis/turnoveranalysis.py
+++ b/alphamind/analysis/turnoveranalysis.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-1-15
+
+@author: cheng.li
+"""
+
+import numpy as np
+from alphamind.data.standardize import standardize
+
+
+def factor_turn_over(factor_values: np.ndarray,
+                     trade_dates: np.ndarray,
+                     codes: np.ndarray,
+                     use_standize: bool=True):
+    if use_standize:
+        factor_values = standardize(factor_values, trade_dates)
+
+
+if __name__ == '__main__':
+    from alphamind.api import *
+    engine = SqlEngine()
+
+    factor = 'ep_q'
+    freq = '5b'
+    start_date = '2017-06-01'
+    end_date = '2017-08-01'
+    universe = Universe('custom', ['zz500'])
+
+
--- a/alphamind/api.py
+++ b/alphamind/api.py
@@ -14,6 +14,10 @@ from alphamind.data.engines.universe import Universe
 from alphamind.data.processing import factor_processing

 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import LinearConstraints
+from alphamind.portfolio.constraints import BoundaryType
+from alphamind.portfolio.constraints import BoundaryDirection
+from alphamind.portfolio.constraints import create_box_bounds
 from alphamind.portfolio.evolver import evolve_positions

 from alphamind.data.engines.sqlengine import risk_styles
@@ -24,6 +28,7 @@ from alphamind.data.standardize import standardize
 from alphamind.data.standardize import projection
 from alphamind.data.neutralize import neutralize
 from alphamind.data.engines.sqlengine import factor_tables
+from alphamind.data.engines.utilities import industry_list

 from alphamind.model import LinearRegression
 from alphamind.model import LassoRegression
@@ -37,6 +42,7 @@ from alphamind.model import XGBTrainer
 from alphamind.model import load_model
 from alphamind.model.data_preparing import fetch_data_package
 from alphamind.model.data_preparing import fetch_train_phase
+from alphamind.model.data_preparing import fetch_predict_phase

 from alphamind.execution.naiveexecutor import NaiveExecutor
 from alphamind.execution.thresholdexecutor import ThresholdExecutor
@@ -56,6 +62,10 @@ __all__ = [
    'Universe',
    'factor_processing',
    'Constraints',
+    'LinearConstraints',
+    'BoundaryType',
+    'BoundaryDirection',
+    'create_box_bounds',
    'evolve_positions',
    'risk_styles',
    'industry_styles',
@@ -65,8 +75,10 @@ __all__ = [
    'projection',
    'neutralize',
    'factor_tables',
+    'industry_list',
    'fetch_data_package',
    'fetch_train_phase',
+    'fetch_predict_phase',
    'LinearRegression',
    'LassoRegression',
    'ConstLinearModel',

--- a/alphamind/data/dbmodel/models.py
+++ b/alphamind/data/dbmodel/models.py
@@ -670,6 +670,7 @@ class Experimental(Base):
    val_q = Column(Float(53))
    ep_q = Column(Float(53))
    ep_q_d_1w = Column(Float(53))
+    ev = Column(Float(53))


 class FactorMaster(Base):

--- a/alphamind/data/engines/industries.py
+++ b/alphamind/data/engines/industries.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-1-24
+
+@author: cheng.li
+"""
+
+INDUSTRY_MAPPING = {
+    'sw': {
+        1: ["采掘", "传媒", "电气设备", "电子", "房地产", "纺织服装", "非银金融", "钢铁", "公用事业", "国防军工", "化工", "机械设备", "计算机", "家用电器", "建筑材料",
+            "建筑装饰", "交通运输", "农林牧渔", "汽车", "轻工制造", "商业贸易", "食品饮料", "通信", "休闲服务", "医药生物", "银行", "有色金属", "综合"],
+        2: ["白色家电", "半导体", "包装印刷", "保险", "玻璃制造", "采掘服务", "餐饮", "畜禽养殖", "船舶制造", "地面兵装", "电机", "电力", "电气自动化设备", "电源设备",
+            "电子制造", "动物保健", "多元金融", "房地产开发", "房屋建设", "纺织制造", "服装家纺", "钢铁", "港口", "高低压设备", "高速公路", "工业金属", "公交", "光学光电子",
+            "航空运输", "航空装备", "航天装备", "航运", "互联网传媒", "化学纤维", "化学原料", "化学制品", "化学制药", "环保工程及服务", "黄金", "机场", "基础建设",
+            "计算机设备", "计算机应用", "家用轻工", "金属非金属新材料", "金属制品", "景点", "酒店", "林业", "旅游综合", "贸易", "煤炭开采", "农产品加工", "农业综合",
+            "其他采掘", "其他电子", "其他建材", "其他交运设备", "其他轻工制造", "其他休闲服务", "汽车服务", "汽车零部件", "汽车整车", "燃气", "商业物业经营", "生物制品",
+            "石油化工", "石油开采", "食品加工", "视听器材", "水泥制造", "水务", "饲料", "塑料", "铁路运输", "通信设备", "通信运营", "通用机械", "文化传媒", "物流",
+            "稀有金属", "橡胶", "一般零售", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "银行", "饮料制造", "营销传播", "渔业", "元件", "园林工程", "园区开发",
+            "运输设备", "造纸", "证券", "中药", "种植业", "专业工程", "专业零售", "专用设备", "装修装饰", "综合"],
+        3: ["IT服务", "LED", "氨纶", "白酒", "百货", "半导体材料", "包装印刷", "保险", "被动元件", "冰箱", "玻璃制造", "玻纤", "彩电", "餐饮", "超市",
+            "城轨建设", "乘用车", "储能设备", "畜禽养殖", "船舶制造", "纯碱", "磁性材料", "氮肥", "低压设备", "涤纶", "地面兵装", "电机", "电网自动化", "电子零部件制造",
+            "电子系统组装", "动物保健", "多业态零售", "多元金融", "房地产开发", "房屋建设", "纺织服装设备", "纺织化学用品", "非金属新材料", "分立器件", "风电设备", "氟化工及制冷剂",
+            "辅料", "复合肥", "改性塑料", "钢结构", "港口", "高速公路", "高压设备", "工程机械", "工控自动化", "公交", "管材", "光伏设备", "光学元件", "国际工程承包",
+            "果蔬加工", "海洋捕捞", "航空运输", "航空装备", "航天装备", "航运", "合成革", "互联网信息服务", "化学工程", "化学原料药", "化学制剂", "环保工程及服务", "环保设备",
+            "黄金", "黄酒", "火电", "火电设备", "机场", "机床工具", "机械基础件", "集成电路", "计量仪表", "计算机设备", "家电零部件", "家纺", "家具", "钾肥", "焦炭加工",
+            "金属新材料", "金属制品", "酒店", "聚氨酯", "空调", "锂", "粮食种植", "粮油加工", "林业", "磷肥", "磷化工及磷酸盐", "楼宇设备", "路桥施工", "轮胎",
+            "旅游综合", "铝", "氯碱", "毛纺", "贸易", "煤炭开采", "棉纺", "民爆用品", "磨具磨料", "耐火材料", "男装", "内燃机", "农药", "农业综合", "农用机械",
+            "女装", "啤酒", "平面媒体", "葡萄酒", "普钢", "其他采掘", "其他采掘服务", "其他电子", "其他纺织", "其他服装", "其他互联网服务", "其他化学原料", "其他化学制品",
+            "其他基础建设", "其他家用轻工", "其他建材", "其他交运设备", "其他酒类", "其他农产品加工", "其他轻工制造", "其他塑料制品", "其他文化传媒", "其他稀有小金属", "其他纤维",
+            "其他橡胶制品", "其他休闲服务", "其他种植业", "其他专业工程", "其它电源设备", "其它视听器材", "其它通用机械", "其它专用机械", "汽车服务", "汽车零部件", "铅锌",
+            "燃机发电", "燃气", "热电", "人工景点", "日用化学产品", "肉制品", "乳品", "软件开发", "软饮料", "商用载货车", "商用载客车", "生物制品", "石油加工", "石油开采",
+            "石油贸易", "食品综合", "水产养殖", "水电", "水利工程", "水泥制造", "水务", "丝绸", "饲料", "炭黑", "特钢", "调味发酵品", "铁路建设", "铁路设备", "铁路运输",
+            "通信传输设备", "通信配套服务", "通信运营", "铜", "涂料油漆油墨制造", "维纶", "文娱用品", "钨", "无机盐", "物流", "稀土", "洗衣机", "显示器件", "线缆部件及其他",
+            "小家电", "鞋帽", "新能源发电", "休闲服装", "冶金矿采化工设备", "一般物业经营", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "移动互联网服务", "银行", "印染",
+            "印刷包装机械", "印制电路板", "营销服务", "影视动漫", "油气钻采服务", "有线电视网络", "园林工程", "园区开发", "造纸", "粘胶", "证券", "制冷空调设备", "中压设备",
+            "中药", "终端设备", "种子生产", "重型机械", "珠宝首饰", "专业连锁", "专业市场", "装修装饰", "自然景点", "综合", "综合电力设备商"]
+    },
+    'sw_adj': {
+        1: ["建筑材料", "机械设备", "家用电器", "交通运输", "化工", "纺织服装", "电气设备", "多元金融", "通信", "传媒", "信息服务", "银行", "农林牧渔", "建筑装饰",
+            "计算机", "轻工制造", "交运设备", "信息设备", "钢铁", "采掘", "建筑建材", "商业贸易", "房地产", "有色金属", "国防军工", "医药生物", "汽车", "公用事业",
+            "保险", "休闲服务", "证券", "电子", "综合", "食品饮料"]
+    },
+    'zz': {
+        1: ["电信业务", "工业", "公用事业", "金融地产", "可选消费", "能源", "信息技术", "医药卫生", "原材料", "主要消费"],
+        2: ["半导体", "保险", "传媒", "电信服务", "房地产", "公用事业", "计算机及电子设备", "计算机运用", "家庭与个人用品", "交通运输", "零售业", "耐用消费品与服装", "能源",
+            "其他金融", "汽车与汽车零部件", "商业服务与用品", "食品、饮料与烟草", "食品与主要用品零售", "通信设备", "消费者服务", "医疗器械与服务", "医药生物", "银行", "原材料",
+            "资本品", "资本市场"],
+        3: ["半导体", "包装食品与肉类", "保险", "传媒", "道路运输", "电力", "电脑与外围设备", "电气设备", "电网", "电信运营服务", "电信增值服务", "电子设备", "多元化零售",
+            "房地产管理与服务", "房地产开发与园区", "纺织服装", "非金属采矿及制品", "钢铁", "个人用品", "工业集团企业", "供热或其他公用事业", "航空公司", "航空航天与国防",
+            "航空货运与物流", "航运", "互联网服务", "互联网零售", "化学原料", "化学制品", "环保设备、工程与服务", "机械制造", "家常用品", "家庭耐用消费品", "建筑材料", "建筑产品",
+            "建筑与工程", "交通基本设施", "酒店、餐馆与休闲", "煤炭", "能源开采设备与服务", "农牧渔产品", "其他金融服务", "其他零售", "汽车零配件与轮胎", "汽车与摩托车", "燃气",
+            "日用品经销商", "容器与包装", "软件开发", "商业服务与用品", "商业银行", "生物科技", "石油与天然气", "食品与主要用品零售", "水务", "通信设备", "消费信贷", "信息技术服务",
+            "休闲设备与用品", "医疗器械", "医疗用品与服务提供商", "饮料", "有色金属", "纸类与林业产品", "制药", "制药与生物科技服务", "珠宝与奢侈品", "资本市场", "综合消费者服务"]
+    },
+    'zjh': {
+        1: ["采矿业", "电力、热力、燃气及水生产和供应业", "房地产业", "建筑业", "交通运输、仓储和邮政业", "教育", "金融业", "居民服务、修理和其他服务业", "科学研究和技术服务业",
+            "农、林、牧、渔业", "批发和零售业", "水利、环境和公共设施管理业", "卫生和社会工作", "文化、体育和娱乐业", "信息传输、软件和信息技术服务业", "制造业", "住宿和餐饮业", "综合",
+            "租赁和商务服务业"],
+        2: ["保险业", "餐饮业", "仓储业", "畜牧业", "道路运输业", "电力、热力生产和供应业", "电气机械和器材制造业", "电信、广播电视和卫星传输服务", "房地产业", "房屋建筑业",
+            "纺织服装、服饰业", "纺织业", "非金属矿采选业", "非金属矿物制品业", "废弃资源综合利用业", "公共设施管理业", "广播、电视、电影和影视录音制作业", "航空运输业", "黑色金属矿采选业",
+            "黑色金属冶炼和压延加工业", "互联网和相关服务", "化学纤维制造业", "化学原料和化学制品制造业", "货币金融服务", "机动车、电子产品和日用产品修理业", "计算机、通信和其他电子设备制造业",
+            "家具制造业", "建筑安装业", "建筑装饰和其他建筑业", "教育", "金属制品业", "酒、饮料和精制茶制造业", "开采辅助活动", "林业", "零售业", "煤炭开采和洗选业",
+            "木材加工和木、竹、藤、棕、草制品业", "农、林、牧、渔服务业", "农副食品加工业", "农业", "批发业", "皮革、毛皮、羽毛及其制品和制鞋业", "其他金融业", "其他制造业", "汽车制造业",
+            "燃气生产和供应业", "软件和信息技术服务业", "商务服务业", "生态保护和环境治理业", "石油和天然气开采业", "石油加工、炼焦和核燃料加工业", "食品制造业", "水的生产和供应业",
+            "水利管理业", "水上运输业", "体育", "铁路、船舶、航空航天和其它运输设备制造业", "铁路运输业", "通用设备制造业", "土木工程建筑业", "卫生", "文化艺术业",
+            "文教、工美、体育和娱乐用品制造业", "橡胶和塑料制品业", "新闻和出版业", "研究和试验发展", "医药制造业", "仪器仪表制造业", "印刷和记录媒介复制业", "邮政业", "有色金属矿采选业",
+            "有色金属冶炼和压延加工业", "渔业", "造纸和纸制品业", "住宿业", "专业技术服务业", "专用设备制造业", "装卸搬运和运输代理业", "资本市场服务", "综合", "租赁业"],
+    },
+    'dx': {
+        1: ["Cyclical", "Defensive", "Sensitive"],
+        2: ["ConsumerDiscretionary", "ConsumerStaples", "Financials", "HealthCare", "Industrials", "IT", "Materials",
+            "RealEstate", "Utilities"]
+    }
+}
--- a/alphamind/data/engines/sqlengine.py
+++ b/alphamind/data/engines/sqlengine.py
@@ -42,6 +42,7 @@ from alphamind.data.engines.utilities import _map_factors
 from alphamind.data.engines.utilities import _map_industry_category
 from alphamind.data.engines.utilities import _map_risk_model_table
 from alphamind.data.engines.utilities import factor_tables
+from alphamind.data.engines.utilities import industry_list
 from PyFin.api import advanceDateByCalendar

 risk_styles = ['BETA',
@@ -207,12 +208,12 @@ class SqlEngine(object):
        cond = universe._query_statements(start_date, end_date, None)

        big_table = join(Market, UniverseTable,
-            and_(
-                Market.trade_date == UniverseTable.trade_date,
-                Market.code == UniverseTable.code,
-                cond
-            )
-        )
+                         and_(
+                             Market.trade_date == UniverseTable.trade_date,
+                             Market.code == UniverseTable.code,
+                             cond
+                         )
+                         )

        query = select([Market.trade_date, Market.code, stats]) \
            .select_from(big_table)
@@ -379,7 +380,7 @@ class SqlEngine(object):
                             FullFactor.code == UniverseTable.code,
                             cond
                         )
-                    )
+                         )

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
@@ -498,7 +499,7 @@ class SqlEngine(object):
                             FullFactor.code == UniverseTable.code,
                             cond
                         )
-                    )
+                         )

        query = select(
            [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
@@ -508,20 +509,24 @@ class SqlEngine(object):

        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
-            risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
+            risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(
+                ['trade_date', 'code'])

        return risk_cov, risk_exp

    def fetch_industry(self,
                       ref_date: str,
                       codes: Iterable[int],
-                       category: str = 'sw'):
+                       category: str = 'sw',
+                       level: int = 1):

        industry_category_name = _map_industry_category(category)
+        code_name = 'industryID' + str(level)
+        category_name = 'industryName' + str(level)

        query = select([Industry.code,
-                        Industry.industryID1.label('industry_code'),
-                        Industry.industryName1.label('industry')]).where(
+                        getattr(Industry, code_name).label('industry_code'),
+                        getattr(Industry, category_name).label('industry')]).where(
            and_(
                Industry.trade_date == ref_date,
                Industry.code.in_(codes),
@@ -531,14 +536,36 @@ class SqlEngine(object):

        return pd.read_sql(query, self.engine)

+    def fetch_industry_matrix(self,
+                              ref_date: str,
+                              codes: Iterable[int],
+                              category: str = 'sw',
+                              level: int = 1):
+        df = self.fetch_industry(ref_date, codes, category, level)
+        df['industry_name'] = df['industry']
+        df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
+        industries = industry_list(category, level)
+
+        in_s = []
+        out_s = []
+        for i in industries:
+            if i in df:
+                in_s.append(i)
+            else:
+                out_s.append(i)
+
+        res = df[['code', 'industry_code', 'industry_name'] + in_s]
+        res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
+        return res
+
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
-                             category: str = 'sw'):
+                             category: str = 'sw',
+                             level: int = 1):
        industry_category_name = _map_industry_category(category)
-
        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(Industry, UniverseTable,
@@ -547,13 +574,15 @@ class SqlEngine(object):
                             Industry.code == UniverseTable.code,
                             Industry.industry == industry_category_name,
                             cond
-                         )
-                    )
+                         ))
+
+        code_name = 'industryID' + str(level)
+        category_name = 'industryName' + str(level)

        query = select([Industry.trade_date,
                        Industry.code,
-                        Industry.industryID1.label('industry_code'),
-                        Industry.industryName1.label('industry')]).select_from(big_table).distinct()
+                        getattr(Industry, code_name).label('industry_code'),
+                        getattr(Industry, category_name).label('industry')]).select_from(big_table).distinct()

        df = pd.read_sql(query, self.engine)
        if universe.is_filtered:
@@ -561,7 +590,46 @@ class SqlEngine(object):
            df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
        return df

-    def fetch_data(self, ref_date: str,
+    def fetch_industry_matrix_range(self,
+                                    universe: Universe,
+                                    start_date: str = None,
+                                    end_date: str = None,
+                                    dates: Iterable[str] = None,
+                                    category: str = 'sw',
+                                    level: int = 1):
+
+        df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
+        df['industry_name'] = df['industry']
+        df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
+        industries = industry_list(category, level)
+
+        in_s = []
+        out_s = []
+        for i in industries:
+            if i in df:
+                in_s.append(i)
+            else:
+                out_s.append(i)
+
+        res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
+
+        res = res.assign(**dict(zip(out_s, [0]*len(out_s))))
+        return res
+
+    def fetch_trade_status(self,
+                           ref_date: str,
+                           codes: Iterable[int]):
+
+        query = select([Market.code, Market.isOpen]).where(
+            and_(
+                Market.trade_date == ref_date,
+                Market.code.in_(codes)
+            )
+        )
+        return pd.read_sql(query, self.engine).sort_values(['code'])
+
+    def fetch_data(self,
+                   ref_date: str,
                   factors: Iterable[str],
                   codes: Iterable[int],
                   benchmark: int = None,
@@ -802,10 +870,10 @@ class SqlEngine(object):
        else:
            id_filter = 'in_'

-        t = select([table.trade_id]).\
+        t = select([table.trade_id]). \
            where(and_(table.trade_date <= ref_date,
                       table.operation == 'withdraw')).alias('t')
-        query = select([table]).\
+        query = select([table]). \
            where(and_(getattr(table.trade_id, id_filter)(t),
                       table.trade_date <= ref_date,
                       table.operation == 'lend'))
@@ -823,7 +891,7 @@ class SqlEngine(object):
            rule = x['price_rule'].split('@')

            if rule[0] in ['closePrice', 'openPrice']:
-                query = select([getattr(Market, rule[0])]).\
+                query = select([getattr(Market, rule[0])]). \
                    where(and_(Market.code == code, Market.trade_date == rule[1]))
                data = pd.read_sql(query, self.engine)
                if not data.empty:
@@ -835,6 +903,7 @@ class SqlEngine(object):
            else:
                raise KeyError('do not have rule for %s' % x['price_rule'])
            return price
+
        df['price'] = df.apply(lambda x: parse_price_rule(x), axis=1)

        df.drop(['remark', 'price_rule', 'operation'], axis=1, inplace=True)
@@ -848,12 +917,10 @@ class SqlEngine(object):


 if __name__ == '__main__':
-
    universe = Universe('ss', ['hs300'])

    engine = SqlEngine()
-
-    df = engine.fetch_outright_status('2017-12-28')
-
-    print(df)
-
+    ref_date = '2017-12-28'
+    codes = universe.query(engine, dates=[ref_date])
+    df = engine.fetch_trade_status(ref_date, codes.code.tolist())
+    print(df)
\ No newline at end of file
--- a/alphamind/data/engines/utilities.py
+++ b/alphamind/data/engines/utilities.py
@@ -13,6 +13,7 @@ from alphamind.data.dbmodel.models import RiskCovLong
 from alphamind.data.dbmodel.models import FullFactor
 from alphamind.data.dbmodel.models import Gogoal
 from alphamind.data.dbmodel.models import Experimental
+from alphamind.data.engines.industries import INDUSTRY_MAPPING


 factor_tables = [FullFactor, Gogoal, Experimental]
@@ -43,5 +44,17 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
 def _map_industry_category(category: str) -> str:
    if category == 'sw':
        return '申万行业分类'
+    if category == 'sw_adj':
+        return '申万行业分类修订'
+    elif category == 'zz':
+        return '中证行业分类'
+    elif category == 'dx':
+        return '东兴行业分类'
+    elif category == 'zjh':
+        return '证监会行业V2012'
    else:
-        raise ValueError("No other industry is supported at the current time")
\ No newline at end of file
+        raise ValueError("No other industry is supported at the current time")
+
+
+def industry_list(category: str, level: int=1) -> list:
+    return INDUSTRY_MAPPING[category][level]
\ No newline at end of file
--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
        mean_values = transform(groups, x, 'mean')
        std_values = transform(groups, x, 'std', ddof)

-        return (x - mean_values) / std_values
+        return (x - mean_values) / np.maximum(std_values, 1e-8)
    else:
-        return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
+        return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)


 def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
@@ -48,7 +48,7 @@ class Standardizer(object):
        self.std_ = simple_std(x, axis=0, ddof=self.ddof_)

    def transform(self, x: np.ndarray) -> np.ndarray:
-        return (x - self.mean_) / self.std_
+        return (x - self.mean_) / np.maximum(self.std_, 1e-8)


 class GroupedStandardizer(object):
@@ -69,4 +69,4 @@ class GroupedStandardizer(object):
    def transform(self, x: np.ndarray) -> np.ndarray:
        groups = x[:, 0].astype(int)
        index = array_index(self.labels_, groups)
-        return (x[:, 1:] - self.mean_[index]) / self.std_[index]
+        return (x[:, 1:] - self.mean_[index]) / np.maximum(self.std_[index], 1e-8)
--- a/alphamind/data/transformer.py
+++ b/alphamind/data/transformer.py
@@ -5,6 +5,7 @@ Created on 2017-8-23
 @author: cheng.li
 """

+import copy
 import pandas as pd
 from PyFin.api import pyFinAssert
 from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
@@ -58,7 +59,7 @@ class Transformer(object):
    def __init__(self,
                 expressions):
        expression_dict, expression_dependency = \
-            factor_translator(expressions)
+            factor_translator(copy.deepcopy(expressions))

        if expression_dict:
            self.names = sorted(expression_dict.keys())

--- a/alphamind/examples/factor_analysis_example.py
+++ b/alphamind/examples/factor_analysis_example.py
@@ -20,195 +20,249 @@ plt.style.use('ggplot')
 Back test parameter settings
 """

-start_date = '2015-01-01'
-end_date = '2017-11-28'
-benchmark_code = 300
-universe_name = ['hs300']
-universe = Universe(universe_name, universe_name)
-frequency = '5b'
+start_date = '2010-01-01'
+end_date = '2018-01-26'
+
+frequency = '10b'
 method = 'risk_neutral'
-use_rank = 100
 industry_lower = 1.
 industry_upper = 1.
 neutralize_risk = ['SIZE'] + industry_styles
 constraint_risk = ['SIZE'] + industry_styles
 size_risk_lower = 0
 size_risk_upper = 0
-turn_over_target_base = 0.25
-benchmark_total_lower = 1.
-benchmark_total_upper = 1.
+turn_over_target_base = 0.30
+benchmark_total_lower = 0.8
+benchmark_total_upper = 1.0
 horizon = map_freq(frequency)

 executor = NaiveExecutor()

-engine = SqlEngine()
-
-"""
-Model phase: we need 1 constant linear model and one linear regression model    
-"""
-
-alpha_name = ['alpha_factor']
-#const_features = {alpha_name[0]: LAST('optimism_confidence_25d') + LAST('pessimism_confidence_25d')}
-# const_features = {alpha_name[0]: CSRes(DIFF(1. / LAST('PE')), LAST('roe_q'))}
-
-simple_expression = LAST('cfinc1_q') # CSRes(CSRes(LAST('DividendPS'), LAST('roe_q')), LAST('ep_q'))
-
-const_features = {alpha_name[0]: simple_expression}
-const_weights = np.array([1.])
-
-const_model = ConstLinearModel(features=alpha_name,
-                               weights=const_weights)
-
-ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
-
-const_model_factor_data = engine.fetch_data_range(universe,
-                                                  const_features,
-                                                  dates=ref_dates,
-                                                  benchmark=benchmark_code)['factor'].dropna()
-
-horizon = map_freq(frequency)
-
-rets = []
-turn_overs = []
-leverags = []
-previous_pos = pd.DataFrame()
-
-index_dates = []

+def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True, neutralize_factors=None):

-factor_groups = const_model_factor_data.groupby('trade_date')
+    """
+    Model phase: we need 1 constant linear model and one linear regression model
+    """
+    alpha_name = [str(factor_name) + '_' + ('pos' if positive else 'neg')]

-for i, value in enumerate(factor_groups):
-    date = value[0]
-    data = value[1]
-    ref_date = date.strftime('%Y-%m-%d')
-    index_dates.append(date)
+    if neutralize_factors:
+        prev_factors = []
+        for i, f in enumerate(neutralize_factors):
+            pure_factor = LAST(f)
+            for j in range(i):
+                pure_factor = CSRes(pure_factor, prev_factors[j])
+            prev_factors.append(pure_factor)

-    total_data = data.fillna(data[alpha_name].median())
-    alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
-    risk_exp = total_data[neutralize_risk].values.astype(float)
-    industry = total_data.industry_code.values
-    benchmark_w = total_data.weight.values
-
-    constraint_exp = total_data[constraint_risk].values
-    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
-
-    risk_names = constraint_risk + ['total']
-    risk_target = risk_exp_expand.T @ benchmark_w
-
-    lbound = np.maximum(0., benchmark_w - 0.02)  # np.zeros(len(total_data))
-    ubound = 0.02 + benchmark_w
-
-    is_in_benchmark = (benchmark_w > 0.).astype(float)
-
-    risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
-    risk_names.append('benchmark_total')
-
-    constraint = Constraints(risk_exp_expand, risk_names)
-
-    for i, name in enumerate(risk_names):
-        if name == 'total':
-            constraint.set_constraints(name,
-                                       lower_bound=risk_target[i],
-                                       upper_bound=risk_target[i])
-        elif name == 'SIZE':
-            base_target = abs(risk_target[i])
-            constraint.set_constraints(name,
-                                       lower_bound=risk_target[i] + base_target * size_risk_lower,
-                                       upper_bound=risk_target[i] + base_target * size_risk_upper)
-        elif name == 'benchmark_total':
-            base_target = benchmark_w.sum()
-            constraint.set_constraints(name,
-                                       lower_bound=benchmark_total_lower * base_target,
-                                       upper_bound=benchmark_total_upper * base_target)
-        else:
-            constraint.set_constraints(name,
-                                       lower_bound=risk_target[i] * industry_lower,
-                                       upper_bound=risk_target[i] * industry_upper)
-
-    factor_values = factor_processing(total_data[alpha_name].values,
-                                      pre_process=[winsorize_normal, standardize],
-                                      risk_factors=risk_exp,
-                                      post_process=[winsorize_normal, standardize])
-
-    # const linear model
-    er = const_model.predict(factor_values)
-
-    codes = total_data['code'].values
-
-    if previous_pos.empty:
-        current_position = None
-        turn_over_target = None
+        simple_expression = LAST(factor_name)
+        for f in prev_factors:
+            simple_expression = CSRes(simple_expression, f)
    else:
-        previous_pos.set_index('code', inplace=True)
-        remained_pos = previous_pos.loc[codes]
-
-        remained_pos.fillna(0., inplace=True)
-        turn_over_target = turn_over_target_base
-        current_position = remained_pos.weight.values
-
-    try:
-        target_pos, _ = er_portfolio_analysis(er,
-                                              industry,
-                                              None,
-                                              constraint,
-                                              False,
-                                              benchmark_w,
-                                              method=method,
-                                              use_rank=use_rank,
-                                              turn_over_target=turn_over_target,
-                                              current_position=current_position,
-                                              lbound=lbound,
-                                              ubound=ubound)
-    except ValueError:
-        alpha_logger.info('{0} full re-balance'.format(date))
-        target_pos, _ = er_portfolio_analysis(er,
-                                              industry,
-                                              None,
-                                              constraint,
-                                              False,
-                                              benchmark_w,
-                                              method=method,
-                                              use_rank=use_rank,
-                                              lbound=lbound,
-                                              ubound=ubound)
-
-    target_pos['code'] = total_data['code'].values
-
-    turn_over, executed_pos = executor.execute(target_pos=target_pos)
-
-    executed_codes = executed_pos.code.tolist()
-    dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
-
-    result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
-    result = pd.merge(result, dx_returns, on=['code'])
-
-    leverage = result.weight_x.abs().sum()
-
-    ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
-    rets.append(np.log(1. + ret))
-    executor.set_current(executed_pos)
-    turn_overs.append(turn_over)
-    leverags.append(leverage)
-
-    previous_pos = executed_pos
-    alpha_logger.info('{0} is finished'.format(date))
-
-ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
-
-# index return
-index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
-                                                  offset=1).set_index('trade_date')
-ret_df['index'] = index_return['dx']
-
-ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
-ret_df = ret_df.shift(1)
-ret_df.iloc[0] = 0.
-ret_df['tc_cost'] = ret_df.turn_over * 0.002
-ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
-
-ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
-                                             title='Fixed frequency rebalanced: {0}'.format(frequency),
-                                             secondary_y='tc_cost')
-
-plt.show()
+        simple_expression = LAST(factor_name)
+
+    if not positive:
+        simple_expression = -simple_expression
+
+    const_features = {alpha_name[0]: simple_expression}
+    const_weights = np.array([1.])
+
+    const_model = ConstLinearModel(features=alpha_name,
+                                   weights=const_weights)
+
+    ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
+
+    const_model_factor_data = engine.fetch_data_range(universe,
+                                                      const_features,
+                                                      dates=ref_dates,
+                                                      benchmark=benchmark_code)['factor'].dropna()
+
+    horizon = map_freq(frequency)
+
+    rets = []
+    turn_overs = []
+    leverags = []
+    previous_pos = pd.DataFrame()
+    index_dates = []
+    factor_groups = const_model_factor_data.groupby('trade_date')
+
+    for i, value in enumerate(factor_groups):
+        date = value[0]
+        data = value[1]
+        index_dates.append(date)
+
+        total_data = data.fillna(data[alpha_name].median())
+        alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
+        risk_exp = total_data[neutralize_risk].values.astype(float)
+        industry = total_data.industry_code.values
+        benchmark_w = total_data.weight.values
+
+        constraint_exp = total_data[constraint_risk].values
+        risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
+
+        risk_names = constraint_risk + ['total']
+        risk_target = risk_exp_expand.T @ benchmark_w
+
+        lbound = np.maximum(0., benchmark_w - 0.02)  # np.zeros(len(total_data))
+        ubound = 0.02 + benchmark_w
+
+        is_in_benchmark = (benchmark_w > 0.).astype(float)
+
+        risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
+        risk_names.append('benchmark_total')
+
+        constraint = Constraints(risk_exp_expand, risk_names)
+
+        for j, name in enumerate(risk_names):
+            if name == 'total':
+                constraint.set_constraints(name,
+                                           lower_bound=risk_target[j],
+                                           upper_bound=risk_target[j])
+            elif name == 'SIZE':
+                base_target = abs(risk_target[j])
+                constraint.set_constraints(name,
+                                           lower_bound=risk_target[j] + base_target * size_risk_lower,
+                                           upper_bound=risk_target[j] + base_target * size_risk_upper)
+            elif name == 'benchmark_total':
+                base_target = benchmark_w.sum()
+                constraint.set_constraints(name,
+                                           lower_bound=benchmark_total_lower * base_target,
+                                           upper_bound=benchmark_total_upper * base_target)
+            else:
+                constraint.set_constraints(name,
+                                           lower_bound=risk_target[j] * industry_lower,
+                                           upper_bound=risk_target[j] * industry_upper)
+
+        factor_values = factor_processing(total_data[alpha_name].values,
+                                          pre_process=[winsorize_normal, standardize],
+                                          risk_factors=risk_exp,
+                                          post_process=[winsorize_normal, standardize])
+
+        # const linear model
+        er = const_model.predict(factor_values)
+
+        codes = total_data['code'].values
+
+        if previous_pos.empty:
+            current_position = None
+            turn_over_target = None
+        else:
+            previous_pos.set_index('code', inplace=True)
+            remained_pos = previous_pos.loc[codes]
+
+            remained_pos.fillna(0., inplace=True)
+            turn_over_target = turn_over_target_base
+            current_position = remained_pos.weight.values
+
+        try:
+            target_pos, _ = er_portfolio_analysis(er,
+                                                  industry,
+                                                  None,
+                                                  constraint,
+                                                  False,
+                                                  benchmark_w,
+                                                  method=method,
+                                                  turn_over_target=turn_over_target,
+                                                  current_position=current_position,
+                                                  lbound=lbound,
+                                                  ubound=ubound)
+        except ValueError:
+            alpha_logger.info('{0} full re-balance'.format(date))
+            target_pos, _ = er_portfolio_analysis(er,
+                                                  industry,
+                                                  None,
+                                                  constraint,
+                                                  False,
+                                                  benchmark_w,
+                                                  method=method,
+                                                  lbound=lbound,
+                                                  ubound=ubound)
+
+        target_pos['code'] = total_data['code'].values
+
+        turn_over, executed_pos = executor.execute(target_pos=target_pos)
+
+        executed_codes = executed_pos.code.tolist()
+        dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
+
+        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
+        result = pd.merge(result, dx_returns, on=['code'])
+
+        leverage = result.weight_x.abs().sum()
+
+        ret = result.weight_x.values @ (np.exp(result.dx.values) - 1.)
+        rets.append(np.log(1. + ret))
+        executor.set_current(executed_pos)
+        turn_overs.append(turn_over)
+        leverags.append(leverage)
+
+        previous_pos = executed_pos
+        alpha_logger.info('{0} is finished'.format(date))
+
+    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=index_dates)
+
+    # index return
+    index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon,
+                                                      offset=1).set_index('trade_date')
+    ret_df['index'] = index_return['dx']
+
+    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
+    ret_df = ret_df.shift(1)
+    ret_df.iloc[0] = 0.
+    ret_df['tc_cost'] = ret_df.turn_over * 0.002
+    ret_df['returns'] = ret_df['returns'] - ret_df['index'] * ret_df['leverage']
+
+    return alpha_name[0], ret_df
+
+
+def worker_func_positive(factor_name):
+    from alphamind.api import SqlEngine, Universe
+    neutralize_factors = None #['roe_q', 'ep_q']
+    engine = SqlEngine()
+    benchmark_code = 905
+    universe_name = ['zz500']
+    universe = Universe('custom', universe_name)
+    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=True, neutralize_factors=neutralize_factors)
+
+
+def worker_func_negative(factor_name):
+    from alphamind.api import SqlEngine, Universe
+    neutralize_factors = None #['roe_q', 'ep_q']
+    engine = SqlEngine()
+    benchmark_code = 905
+    universe_name = ['zz500']
+    universe = Universe('custom', universe_name)
+    return factor_analysis(engine, factor_name, universe, benchmark_code, positive=False, neutralize_factors=neutralize_factors)
+
+
+if __name__ == '__main__':
+    # from dask.distributed import Client
+    #
+    # client = Client('10.63.6.176:8786')
+    #
+    # engine = SqlEngine()
+    # df = engine.fetch_factor_coverage()
+    # df = df[df.universe == 'zz800'].groupby('factor').mean()
+    # df = df[df.coverage >= 0.98]
+    #
+    # tasks = client.map(worker_func_positive, df.index.tolist())
+    # res1 = client.gather(tasks)
+    #
+    # tasks = client.map(worker_func_negative, df.index.tolist())
+    # res2 = client.gather(tasks)
+    #
+    # factor_df = pd.DataFrame()
+    #
+    # for f_name, df in res1:
+    #     factor_df[f_name] = df['returns']
+    #
+    # for f_name, df in res2:
+    #     factor_df[f_name] = df['returns']
+
+    factor_name = LAST('ep_q') # LAST('EBITDA') / LAST('ev')
+    f_name, ret_df = worker_func_positive(factor_name)
+
+    ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
+                                                 title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
+                                                     frequency, factor_name, 905),
+                                                 secondary_y='tc_cost')
+    plt.show()
--- a/alphamind/examples/factor_res_analysis.py
+++ b/alphamind/examples/factor_res_analysis.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-1-15
+
+@author: cheng.li
+"""
+
+import numpy as np
+import pandas as pd
+from PyFin.api import *
+from alphamind.api import *
+
+
+def factor_residue_analysis(start_date,
+                            end_date,
+                            factor_name,
+                            factor,
+                            freq,
+                            universe,
+                            engine):
+    neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
+    n_bins = 5
+    horizon = map_freq(freq)
+
+    dates = makeSchedule(start_date,
+                         end_date,
+                         tenor=freq,
+                         calendar='china.sse')
+
+    alpha_factor_name = factor_name + '_res'
+    alpha_factor = {alpha_factor_name: factor}
+    factor_all_data = engine.fetch_data_range(universe,
+                                              alpha_factor,
+                                              dates=dates)['factor']
+    return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
+
+    factor_groups = factor_all_data.groupby('trade_date')
+    return_groups = return_all_data.groupby('trade_date')
+    final_res = np.zeros((len(factor_groups.groups), n_bins))
+
+    index_dates = []
+
+    for i, value in enumerate(factor_groups):
+        date = value[0]
+        data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
+        returns = return_groups.get_group(date)
+
+        total_data = pd.merge(data, returns, on=['code']).dropna()
+        risk_exp = total_data[neutralize_risk].values.astype(float)
+        dx_return = total_data.dx.values
+
+        index_dates.append(date)
+        try:
+            er = factor_processing(total_data[[alpha_factor_name]].values,
+                                   pre_process=[winsorize_normal, standardize],
+                                   risk_factors=risk_exp,
+                                   post_process=[winsorize_normal, standardize])
+            res = er_quantile_analysis(er,
+                                       n_bins=n_bins,
+                                       dx_return=dx_return)
+        except Exception as e:
+            print(e)
+            res = np.zeros(n_bins)
+
+        final_res[i] = res
+
+    df = pd.DataFrame(final_res, index=index_dates)
+
+    start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
+    df.loc[start_date] = 0.
+    df.sort_index(inplace=True)
+    df['$top1 - bottom1$'] = df[4] - df[0]
+    return df
+
+
+def factor_analysis(f_name):
+    from alphamind.api import SqlEngine, Universe, alpha_logger
+    engine = SqlEngine()
+    universe = Universe('custom', ['zz800'])
+    base1 = LAST('Alpha60')
+    base2 = CSRes('roe_q', base1)
+    base3 = CSRes(CSRes('ep_q', base1), base2)
+    factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
+    res = factor_residue_analysis('2010-01-01',
+                                  '2018-01-26',
+                                  f_name,
+                                  factor,
+                                  '10b',
+                                  universe,
+                                  engine)
+    alpha_logger.info('{0} is done'.format(f_name))
+    return f_name, res
+
+
+if __name__ == '__main__':
+    from dask.distributed import Client
+    client = Client('10.63.6.176:8786')
+
+    engine = SqlEngine()
+    df = engine.fetch_factor_coverage()
+    df = df[df.universe == 'zz800'].groupby('factor').mean()
+    df = df[df.coverage >= 0.98]
+    universe = Universe('custom', ['zz800'])
+
+    factor_df = pd.DataFrame()
+
+    tasks = client.map(factor_analysis, df.index.tolist())
+    res = client.gather(tasks)
+
+    for f_name, df in res:
+        factor_df[f_name] = df['$top1 - bottom1$']
+
+    # for i, f_name in enumerate(df.index):
+    #     base1 = LAST('Alpha60')
+    #     base2 = CSRes('roe_q', base1)
+    #     base3 = CSRes(CSRes('ep_q', base1), base2)
+    #     factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
+    #     res = factor_residue_analysis('2010-01-01',
+    #                                   '2018-01-22',
+    #                                   f_name,
+    #                                   factor,
+    #                                   '10b',
+    #                                   universe,
+    #                                   engine)
+    #     factor_df[f_name] = res['$top1 - bottom1$']
+    #     alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
--- a/alphamind/examples/formula_expression.py
+++ b/alphamind/examples/formula_expression.py
@@ -19,9 +19,12 @@ start = dt.datetime.now()

 universe = Universe('custom', ['zz800'])

-simple_expression = CSRes(LAST('OperCashInToAsset'), 'roe_q')
+factor_name = 'Beta20'
+base1 = LAST('roe_q')
+base2 = CSRes(LAST('ep_q'), 'roe_q')
+simple_expression = CSRes(CSRes(LAST(factor_name), base1), base2)

-alpha_factor_name = 'alpha_factor'
+alpha_factor_name = factor_name + '_res'
 alpha_factor = {alpha_factor_name: simple_expression}

 # end of formula definition
@@ -29,7 +32,7 @@ alpha_factor = {alpha_factor_name: simple_expression}
 engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')

 neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
-freq = '10b'
+freq = '5b'
 n_bins = 5
 horizon = map_freq(freq)

@@ -91,8 +94,6 @@ df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(alpha

 # =================================================================== #

-factor_name = 'PE'
-
 alpha_factor_name = alpha_factor_name + '_1w_diff'
 alpha_factor = {alpha_factor_name: DIFF(simple_expression)}


--- a/alphamind/model/data_preparing.py
+++ b/alphamind/model/data_preparing.py
@@ -402,9 +402,9 @@ if __name__ == '__main__':
    universe = Universe('zz500', ['hs300', 'zz500'])
    neutralized_risk = ['SIZE']
    res = fetch_predict_phase(engine, ['ep_q'],
-                            '2018-01-08',
-                            '5b',
-                            universe,
-                            16,
-                            neutralized_risk=neutralized_risk)
+                              '2012-01-05',
+                              '5b',
+                              universe,
+                              16,
+                              neutralized_risk=neutralized_risk)
    print(res)
--- a/alphamind/model/linearmodel.py
+++ b/alphamind/model/linearmodel.py
@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):

    def __init__(self,
                 features: list = None,
+                 formulas: dict = None,
                 weights: np.ndarray = None):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        if features is not None and weights is not None:
            pyFinAssert(len(features) == len(weights),
                        ValueError,
@@ -56,8 +57,8 @@ class ConstLinearModel(ModelBase):

 class LinearRegression(ModelBase):

-    def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):

 class LassoRegression(ModelBase):

-    def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

@@ -112,8 +113,8 @@ class LassoRegression(ModelBase):

 class LogisticRegression(ModelBase):

-    def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)

    def save(self) -> dict:

--- a/alphamind/model/modelbase.py
+++ b/alphamind/model/modelbase.py
@@ -6,6 +6,7 @@ Created on 2017-9-4
 """

 import abc
+import copy
 import arrow
 import numpy as np
 from alphamind.utilities import alpha_logger
@@ -15,10 +16,13 @@ from alphamind.utilities import decode

 class ModelBase(metaclass=abc.ABCMeta):

-    def __init__(self, features: list=None):
+    def __init__(self, features: list=None, formulas: dict=None):
        if features is not None:
            self.features = list(features)
+        else:
+            self.features = None
        self.impl = None
+        self.formulas = copy.deepcopy(formulas)
        self.trained_time = None

    def fit(self, x: np.ndarray, y: np.ndarray):
@@ -43,6 +47,7 @@ class ModelBase(metaclass=abc.ABCMeta):
                          features=list(self.features),
                          trained_time=self.trained_time,
                          desc=encode(self.impl),
+                          formulas=encode(self.formulas),
                          internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
        return model_desc

@@ -50,6 +55,7 @@ class ModelBase(metaclass=abc.ABCMeta):
    def load(cls, model_desc: dict):
        obj_layout = cls()
        obj_layout.features = model_desc['features']
+        obj_layout.formulas = decode(model_desc['formulas'])
        obj_layout.trained_time = model_desc['trained_time']
        obj_layout.impl = decode(model_desc['desc'])
        return obj_layout

--- a/alphamind/model/treemodel.py
+++ b/alphamind/model/treemodel.py
@@ -28,7 +28,7 @@ class RandomForestRegressor(ModelBase):
                 max_features: str='auto',
                 features: List=None,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, **kwargs)
        self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
                                              max_features=max_features,
                                              **kwargs)
@@ -61,8 +61,9 @@ class RandomForestClassifier(ModelBase):
                 n_estimators: int=100,
                 max_features: str='auto',
                 features: List = None,
+                 formulas: dict = None,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
                                               max_features=max_features,
                                               **kwargs)
@@ -96,11 +97,14 @@ class XGBRegressor(ModelBase):
                 learning_rate: float=0.1,
                 max_depth: int=3,
                 features: List=None,
+                 formulas: dict = None,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = XGBRegressorImpl(n_estimators=n_estimators,
                                     learning_rate=learning_rate,
                                     max_depth=max_depth,
+                                     n_jobs=n_jobs,
                                     **kwargs)

    def save(self) -> dict:
@@ -131,12 +135,15 @@ class XGBClassifier(ModelBase):
                 learning_rate: float=0.1,
                 max_depth: int=3,
                 features: List = None,
+                 formulas: dict = None,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = XGBClassifierImpl(n_estimators=n_estimators,
-                                     learning_rate=learning_rate,
-                                     max_depth=max_depth,
-                                     **kwargs)
+                                      learning_rate=learning_rate,
+                                      max_depth=max_depth,
+                                      n_jobs=n_jobs,
+                                      **kwargs)

    def save(self) -> dict:
        model_desc = super().save()
@@ -173,9 +180,11 @@ class XGBTrainer(ModelBase):
                 subsample=1.,
                 colsample_bytree=1.,
                 features: List = None,
-                 random_state=0,
+                 formulas: dict = None,
+                 random_state: int=0,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.params = {
            'silent': 1,
            'objective': objective,
@@ -185,6 +194,7 @@ class XGBTrainer(ModelBase):
            'tree_method': tree_method,
            'subsample': subsample,
            'colsample_bytree': colsample_bytree,
+            'nthread': n_jobs,
            'seed': random_state
        }


--- a/alphamind/portfolio/allocations.py
+++ b/alphamind/portfolio/allocations.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-2-6
+
+@author: cheng.li
+"""
+
+from typing import List
+from typing import Tuple
+from math import inf
+import copy
+from PyFin.api import pyFinAssert
+
+
+class Allocation(object):
+
+    def __init__(self,
+                 code: int,
+                 minimum: int=0,
+                 maximum: int=inf,
+                 current: int=0):
+        self.code = code
+        self.minimum = minimum
+        self.maximum = maximum
+        self.current = current
+
+        pyFinAssert(self.minimum <= self.current, ValueError, "minimum qty should be lower than current")
+        pyFinAssert(self.maximum >= self.current, ValueError, "minimum qty should be greater than current")
+
+    def __repr__(self):
+        return "Allocation(code={0}, minimum={1}, maximum={2}, current={3})".format(self.code,
+                                                                                    self.minimum,
+                                                                                    self.maximum,
+                                                                                    self.current)
+
+
+class Portfolio(object):
+
+    def __init__(self,
+                 name: str,
+                 allocations: List[Allocation]):
+        self.name = name
+        self.allocations = {a.code: a for a in allocations}
+
+    def __getitem__(self, code):
+        try:
+            return self.allocations[code]
+        except KeyError:
+            allocation = Allocation(code, 0, 0, 0)
+            self.allocations[code] = allocation
+            return allocation
+
+    def __repr__(self):
+        return "Portfolio(name={0}, allocations={1})".format(self.name,
+                                                             list(self.allocations.values()))
+
+    @property
+    def codes(self) -> List[int]:
+        return sorted(self.allocations.keys())
+
+
+class Execution(object):
+
+    def __init__(self,
+                 code: int,
+                 qty: int,
+                 comment: str=None):
+        self.code = code
+        self.qty = qty
+        self.comment = comment
+
+    def __repr__(self):
+        return "Execution(code={0}, qty={1}, comment={2})".format(self.code,
+                                                                  self.qty,
+                                                                  self.comment)
+
+
+class Executions(object):
+
+    def __init__(self,
+                 name,
+                 executions: List[Execution]=None):
+        self.name = name
+        self.executions = executions
+
+    def __repr__(self):
+        return "Executions(name={0}, executions={1})".format(self.name,
+                                                             self.executions)
+
+
+class Asset(object):
+
+    def __init__(self,
+                 code: int,
+                 name: str=None,
+                 priority: List[str]=None,
+                 forbidden: List[str]=None):
+        self.code = code
+        self.name = name
+        if priority:
+            self.priority = set(priority)
+        else:
+            self.priority = set()
+
+        if forbidden:
+            self.forbidden = set(forbidden)
+        else:
+            self.forbidden = set()
+        self._validation()
+
+    def _validation(self):
+        for p in self.priority:
+            pyFinAssert(p not in self.forbidden, ValueError, "{0} in priority is in forbidden".format(p))
+
+    def __repr__(self):
+        return "Asset(code={0}, name={1}, priority={2}, forbidden={3})".format(self.code,
+                                                                               self.name,
+                                                                               self.priority,
+                                                                               self.forbidden)
+
+
+class TargetPositions(object):
+
+    def __init__(self,
+                 assets: List[Asset]=None,
+                 qtys: List[int]=None):
+
+        if assets:
+            self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
+        else:
+            self.targets = {}
+
+    def add_asset(self,
+                  asset: Asset,
+                  qty: int):
+        if asset.code in self.targets:
+            raise ValueError()
+        self.targets[asset.code] = (asset, qty)
+
+    def __getitem__(self, code: int) -> Tuple[Asset, int]:
+        return self.targets[code]
+
+    @property
+    def codes(self) -> List[int]:
+        return sorted(self.targets.keys())
+
+    def __repr__(self):
+        return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
+
+
+def handle_one_asset(pre_allocation: Allocation,
+                     asset: Asset,
+                     qty: int) -> Tuple[Execution, Allocation, int]:
+
+    minimum = pre_allocation.minimum
+    maximum = pre_allocation.maximum
+    current = pre_allocation.current
+    code = pre_allocation.code
+
+    if qty < minimum:
+        raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
+    elif qty < maximum:
+        # need to buy / sell
+        ex = Execution(code, qty - current)
+        allocation = Allocation(code,
+                                minimum=minimum,
+                                maximum=maximum,
+                                current=qty)
+        qty = 0
+    else:
+        ex = Execution(code, maximum - current)
+        allocation = Allocation(code,
+                                minimum=minimum,
+                                maximum=maximum,
+                                current=maximum)
+        qty = qty - maximum
+    return ex, allocation, qty
+
+
+def pass_through(target_pos: TargetPositions,
+                 portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]:
+
+    p_name = portfolio.name
+    new_target_pos = TargetPositions()
+
+    allocations = []
+    executions = []
+
+    for code in target_pos.codes:
+        asset, qty = target_pos[code]
+        if asset.priority:
+            raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
+
+        if p_name in asset.forbidden:
+            ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
+            allocation = copy.deepcopy(portfolio[code])
+            new_target_pos.add_asset(asset, qty)
+        else:
+            prev_allocation = portfolio[code]
+            ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty)
+            new_target_pos.add_asset(asset, qty)
+
+        allocations.append(allocation)
+        executions.append(ex)
+
+    return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos
+
+
+if __name__ == '__main__':
+
+    asset1 = Asset(1, 'a')
+    asset2 = Asset(2, 'b')
+    asset3 = Asset(3, 'b')
+    target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100])
+
+    allc1 = Allocation(1, 0, 100, 0)
+    allc2 = Allocation(2, 0, 400, 100)
+    allc2 = Allocation(3, 0, 400, 200)
+    portfolio = Portfolio('test1', [allc1, allc2])
+
+    executions, portfolio, target_pos = pass_through(target_pos, portfolio)
+
+
+
+
+
--- a/alphamind/portfolio/constraints.py
+++ b/alphamind/portfolio/constraints.py
@@ -5,17 +5,137 @@ Created on 2017-7-21
 @author: cheng.li
 """

+from deprecated import deprecated
 from math import inf
 import numpy as np
+import pandas as pd
+from enum import IntEnum
 from typing import Tuple
 from typing import Optional
+from typing import Dict
+from typing import List
+from typing import Union
+from typing import Iterable
+from PyFin.api import pyFinAssert


+class BoundaryDirection(IntEnum):
+    LOWER = -1
+    UPPER = 1
+
+
+class BoundaryType(IntEnum):
+    ABSOLUTE = 0
+    RELATIVE = 1
+
+
+class BoundaryImpl(object):
+
+    def __init__(self,
+                 direction: BoundaryDirection,
+                 b_type: BoundaryType,
+                 val: float):
+        self.direction = direction
+        self.b_type = b_type
+        self.val = val
+        self._validation()
+
+    def _validation(self):
+        pyFinAssert(self.b_type == BoundaryType.ABSOLUTE or self.b_type == BoundaryType.RELATIVE,
+                    ValueError,
+                    "Boundary Type {0} is not recognized".format(self.b_type))
+
+        pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
+                    ValueError,
+                    "Boundary direction {0} is not recognized".format(self.direction))
+
+    def __call__(self, center: float):
+        if self.b_type == BoundaryType.ABSOLUTE:
+            return self.val + center
+        else:
+            pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
+            return self.val * center
+
+
+class BoxBoundary(object):
+
+    def __init__(self,
+                 lower_bound: BoundaryImpl,
+                 upper_bound: BoundaryImpl):
+        self.lower = lower_bound
+        self.upper = upper_bound
+
+    def bounds(self, center):
+        l_b, u_b = self.lower(center), self.upper(center)
+        pyFinAssert(l_b <= u_b, ValueError, "lower bound should be lower then upper bound")
+        return l_b, u_b
+
+
+def create_box_bounds(names: List[str],
+                      b_type: Union[Iterable[BoundaryType], BoundaryType],
+                      l_val: Union[Iterable[float], float],
+                      u_val: Union[Iterable[float], float]) -> Dict[str, BoxBoundary]:
+    """
+    helper function to quickly create a series of bounds
+    """
+    bounds = dict()
+
+    if not hasattr(b_type, '__iter__'):
+        b_type = np.array([b_type] * len(names))
+
+    if not hasattr(l_val, '__iter__'):
+        l_val = np.array([l_val] * len(names))
+
+    if not hasattr(u_val, '__iter__'):
+        u_val = np.array([u_val] * len(names))
+
+    for i, name in enumerate(names):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             b_type[i],
+                             l_val[i])
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             b_type[i],
+                             u_val[i])
+        bounds[name] = BoxBoundary(lower, upper)
+    return bounds
+
+
+class LinearConstraints(object):
+
+    def __init__(self,
+                 bounds: Dict[str, BoxBoundary],
+                 cons_mat: pd.DataFrame,
+                 backbone: np.ndarray):
+        pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
+        pyFinAssert(cons_mat.shape[0] == len(backbone),
+                    "length of back bond should be same as number of rows of cons_mat")
+        self.names = list(bounds.keys())
+        self.bounds = bounds
+        self.cons_mat = cons_mat
+        self.backbone = backbone
+
+    def risk_targets(self) -> Tuple[np.ndarray, np.ndarray]:
+        lower_bounds = []
+        upper_bounds = []
+
+        for name in self.names:
+            center = self.backbone @ self.cons_mat[name].values
+            l, u = self.bounds[name].bounds(center)
+            lower_bounds.append(l)
+            upper_bounds.append(u)
+        return np.array(lower_bounds), np.array(upper_bounds)
+
+    @property
+    def risk_exp(self) -> np.ndarray:
+        return self.cons_mat[self.names].values
+
+
+@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
 class Constraints(object):

    def __init__(self,
-                 risk_exp: Optional[np.ndarray]=None,
-                 risk_names: Optional[np.ndarray]=None):
+                 risk_exp: Optional[np.ndarray] = None,
+                 risk_names: Optional[np.ndarray] = None):
        self.risk_exp = risk_exp

        if risk_names is not None:
@@ -68,4 +188,4 @@ if __name__ == '__main__':
    cons = Constraints(risk_exp, risk_names)

    cons.set_constraints('b', 0.0, 0.1)
-    print(cons.risk_targets())
\ No newline at end of file
+    print(cons.risk_targets())
--- a/alphamind/tests/portfolio/test_constraints.py
+++ b/alphamind/tests/portfolio/test_constraints.py
@@ -7,7 +7,14 @@ Created on 2017-7-20

 import unittest
 import numpy as np
+import pandas as pd
 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import BoxBoundary
+from alphamind.portfolio.constraints import BoundaryImpl
+from alphamind.portfolio.constraints import BoundaryDirection
+from alphamind.portfolio.constraints import BoundaryType
+from alphamind.portfolio.constraints import create_box_bounds
+from alphamind.portfolio.constraints import LinearConstraints


 class TestConstraints(unittest.TestCase):
@@ -43,6 +50,100 @@ class TestConstraints(unittest.TestCase):
        np.testing.assert_array_almost_equal(risk_targets[0], np.array([-0.1, -np.inf, -0.1]))
        np.testing.assert_array_almost_equal(risk_targets[1], np.array([0.1, np.inf, 0.1]))

+    def test_absolute_box_boundary(self):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             BoundaryType.ABSOLUTE,
+                             -0.8)
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             BoundaryType.ABSOLUTE,
+                             1.1)
+        bound = BoxBoundary(lower, upper)
+
+        center = 2.2
+        l, u = bound.bounds(center)
+        self.assertAlmostEqual(l, 1.4)
+        self.assertAlmostEqual(u, 3.3)
+
+    def test_relative_box_boundary(self):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             BoundaryType.RELATIVE,
+                             0.8)
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             BoundaryType.RELATIVE,
+                             1.1)
+        bound = BoxBoundary(lower, upper)
+
+        center = 2.2
+        l, u = bound.bounds(center)
+        self.assertAlmostEqual(l, 1.76)
+        self.assertAlmostEqual(u, 2.42)
+
+    def test_create_box_bounds_single_value(self):
+        names = ['a', 'b', 'c']
+        b_type = BoundaryType.RELATIVE
+        l_val = 0.8
+        u_val = 1.1
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        for key, bound in bounds.items():
+            l_bound = bound.lower
+            u_bound = bound.upper
+            self.assertEqual(l_bound.b_type, b_type)
+            self.assertEqual(u_bound.b_type, b_type)
+            self.assertAlmostEqual(l_bound.val, l_val)
+            self.assertAlmostEqual(u_bound.val, u_val)
+
+    def test_create_box_bounds_multiple_values(self):
+        names = ['a', 'b', 'c']
+        b_type = BoundaryType.RELATIVE
+        l_val = [0.9, 0.8, 1.1]
+        u_val = [1.1, 1.2, 1.3]
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        for i, name in enumerate(names):
+            bound = bounds[name]
+            l_bound = bound.lower
+            u_bound = bound.upper
+            self.assertEqual(l_bound.b_type, b_type)
+            self.assertEqual(u_bound.b_type, b_type)
+            self.assertAlmostEqual(l_bound.val, l_val[i])
+            self.assertAlmostEqual(u_bound.val, u_val[i])
+
+    def test_linear_constraints(self):
+        cons_mat = np.random.randn(100, 3)
+        backbone = np.random.randn(100)
+        names = ['a', 'b', 'c']
+        cons_mat = pd.DataFrame(cons_mat, columns=names)
+
+        b_type = BoundaryType.ABSOLUTE
+        l_val = -0.8
+        u_val = 1.1
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        constraints = LinearConstraints(bounds=bounds,
+                                        cons_mat=cons_mat,
+                                        backbone=backbone)
+
+        l_bounds, u_bounds = constraints.risk_targets()
+        risk_exp = constraints.risk_exp
+
+        for i, name in enumerate(names):
+            center = risk_exp[:, i] @ backbone
+            self.assertAlmostEqual(center + l_val, l_bounds[i])
+            self.assertAlmostEqual(center + u_val, u_bounds[i])
+

 if __name__ == '__main__':
    unittest.main()
--- a/notebooks/full factor strategy.ipynb
+++ b/notebooks/full factor strategy.ipynb
--- a/requirements.txt
+++ b/requirements.txt
 arrow >= 0.10.0
 cython >= 0.25.2
+deprecated >= 1.1.0
 numpy >= 1.12.1
 pandas >= 0.19.2
 scikit-learn >= 0.18.1

--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ from Cython.Build import cythonize
 from distutils.extension import Extension
 import numpy as np

-VERSION = "0.1.0"
+VERSION = "0.1.1"

 if platform.system() != "Windows":
    import multiprocessing

--- a/xgboost @ bf436718
+++ b/xgboost @ bf436718
-Subproject commit a187ed6c8f3aa40b47d5be80667cbbe6a6fd563d
+Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999