Merge pull request #3 from alpha-miner/master

update

Merge pull request #3 from alpha-miner/master
update
72e4179f · lion-sing · GitHub · ebd7a27f · eac7bef8 · 72e4179f
Unverified Commit 72e4179f authored Feb 08, 2018 by lion-sing Committed by GitHub Feb 08, 2018
26 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,6 +37,7 @@ install:
  - pip install simpleutils
  - pip install coveralls
  - pip install finance-python
+  - pip install deprecated
  - export CWD=$PWD
  - cd /usr/src/gtest
  - sudo cmake CMakeLists.txt

--- a/README.md
+++ b/README.md
@@ -48,11 +48,11 @@ alpha - mind 提供了多因子研究中常用的工具链，包括：

 * Linux

-    在linux上，需要c++编译器（例如g++）以及fortran编译器（例如gfortran)
+    在linux上，需要c++编译器（例如g++）以及fortran编译器（例如gfortran)：

    ```bash
    build_linux_dependencies.sh
-```
+    ```

 ## 安装


--- a/alphamind/__init__.py
+++ b/alphamind/__init__.py
@@ -6,4 +6,4 @@ Created on 2017-4-25
 """


-__version__ = "0.1.0"
+__version__ = "0.1.1"
--- a/alphamind/analysis/factoranalysis.py
+++ b/alphamind/analysis/factoranalysis.py
@@ -7,11 +7,13 @@ Created on 2017-5-25

 from typing import Optional
 from typing import Tuple
+from typing import Union
 import numpy as np
 import pandas as pd
 from alphamind.data.standardize import standardize
 from alphamind.data.winsorize import winsorize_normal
 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import LinearConstraints
 from alphamind.portfolio.longshortbulder import long_short_build
 from alphamind.portfolio.rankbuilder import rank_build
 from alphamind.portfolio.linearbuilder import linear_build
@@ -60,7 +62,7 @@ def factor_analysis(factors: pd.DataFrame,
 def er_portfolio_analysis(er: np.ndarray,
                          industry: np.ndarray,
                          dx_return: np.ndarray,
-                          constraints: Optional[Constraints]=None,
+                          constraints: Optional[Union[LinearConstraints, Constraints]]=None,
                          detail_analysis=True,
                          benchmark: Optional[np.ndarray] = None,
                          is_tradable: Optional[np.ndarray] = None,

--- a/alphamind/analysis/turnoveranalysis.py
+++ b/alphamind/analysis/turnoveranalysis.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-1-15
+
+@author: cheng.li
+"""
+
+import numpy as np
+from alphamind.data.standardize import standardize
+
+
+def factor_turn_over(factor_values: np.ndarray,
+                     trade_dates: np.ndarray,
+                     codes: np.ndarray,
+                     use_standize: bool=True):
+    if use_standize:
+        factor_values = standardize(factor_values, trade_dates)
+
+
+if __name__ == '__main__':
+    from alphamind.api import *
+    engine = SqlEngine()
+
+    factor = 'ep_q'
+    freq = '5b'
+    start_date = '2017-06-01'
+    end_date = '2017-08-01'
+    universe = Universe('custom', ['zz500'])
+
+
--- a/alphamind/api.py
+++ b/alphamind/api.py
@@ -14,6 +14,10 @@ from alphamind.data.engines.universe import Universe
 from alphamind.data.processing import factor_processing

 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import LinearConstraints
+from alphamind.portfolio.constraints import BoundaryType
+from alphamind.portfolio.constraints import BoundaryDirection
+from alphamind.portfolio.constraints import create_box_bounds
 from alphamind.portfolio.evolver import evolve_positions

 from alphamind.data.engines.sqlengine import risk_styles
@@ -24,6 +28,7 @@ from alphamind.data.standardize import standardize
 from alphamind.data.standardize import projection
 from alphamind.data.neutralize import neutralize
 from alphamind.data.engines.sqlengine import factor_tables
+from alphamind.data.engines.utilities import industry_list

 from alphamind.model import LinearRegression
 from alphamind.model import LassoRegression
@@ -37,6 +42,7 @@ from alphamind.model import XGBTrainer
 from alphamind.model import load_model
 from alphamind.model.data_preparing import fetch_data_package
 from alphamind.model.data_preparing import fetch_train_phase
+from alphamind.model.data_preparing import fetch_predict_phase

 from alphamind.execution.naiveexecutor import NaiveExecutor
 from alphamind.execution.thresholdexecutor import ThresholdExecutor
@@ -56,6 +62,10 @@ __all__ = [
    'Universe',
    'factor_processing',
    'Constraints',
+    'LinearConstraints',
+    'BoundaryType',
+    'BoundaryDirection',
+    'create_box_bounds',
    'evolve_positions',
    'risk_styles',
    'industry_styles',
@@ -65,8 +75,10 @@ __all__ = [
    'projection',
    'neutralize',
    'factor_tables',
+    'industry_list',
    'fetch_data_package',
    'fetch_train_phase',
+    'fetch_predict_phase',
    'LinearRegression',
    'LassoRegression',
    'ConstLinearModel',

--- a/alphamind/data/dbmodel/models.py
+++ b/alphamind/data/dbmodel/models.py
@@ -670,6 +670,7 @@ class Experimental(Base):
    val_q = Column(Float(53))
    ep_q = Column(Float(53))
    ep_q_d_1w = Column(Float(53))
+    ev = Column(Float(53))


 class FactorMaster(Base):

--- a/alphamind/data/engines/industries.py
+++ b/alphamind/data/engines/industries.py
--- a/alphamind/data/engines/sqlengine.py
+++ b/alphamind/data/engines/sqlengine.py
@@ -42,6 +42,7 @@ from alphamind.data.engines.utilities import _map_factors
 from alphamind.data.engines.utilities import _map_industry_category
 from alphamind.data.engines.utilities import _map_risk_model_table
 from alphamind.data.engines.utilities import factor_tables
+from alphamind.data.engines.utilities import industry_list
 from PyFin.api import advanceDateByCalendar

 risk_styles = ['BETA',
@@ -207,12 +208,12 @@ class SqlEngine(object):
        cond = universe._query_statements(start_date, end_date, None)

        big_table = join(Market, UniverseTable,
-            and_(
-                Market.trade_date == UniverseTable.trade_date,
-                Market.code == UniverseTable.code,
-                cond
-            )
-        )
+                         and_(
+                             Market.trade_date == UniverseTable.trade_date,
+                             Market.code == UniverseTable.code,
+                             cond
+                         )
+                         )

        query = select([Market.trade_date, Market.code, stats]) \
            .select_from(big_table)
@@ -379,7 +380,7 @@ class SqlEngine(object):
                             FullFactor.code == UniverseTable.code,
                             cond
                         )
-                    )
+                         )

        query = select(
            [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
@@ -498,7 +499,7 @@ class SqlEngine(object):
                             FullFactor.code == UniverseTable.code,
                             cond
                         )
-                    )
+                         )

        query = select(
            [FullFactor.trade_date, FullFactor.code, special_risk_col] + risk_exposure_cols).select_from(big_table) \
@@ -508,20 +509,24 @@ class SqlEngine(object):

        if universe.is_filtered:
            codes = universe.query(self, start_date, end_date, dates)
-            risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
+            risk_exp = pd.merge(risk_exp, codes, how='inner', on=['trade_date', 'code']).sort_values(
+                ['trade_date', 'code'])

        return risk_cov, risk_exp

    def fetch_industry(self,
                       ref_date: str,
                       codes: Iterable[int],
-                       category: str = 'sw'):
+                       category: str = 'sw',
+                       level: int = 1):

        industry_category_name = _map_industry_category(category)
+        code_name = 'industryID' + str(level)
+        category_name = 'industryName' + str(level)

        query = select([Industry.code,
-                        Industry.industryID1.label('industry_code'),
-                        Industry.industryName1.label('industry')]).where(
+                        getattr(Industry, code_name).label('industry_code'),
+                        getattr(Industry, category_name).label('industry')]).where(
            and_(
                Industry.trade_date == ref_date,
                Industry.code.in_(codes),
@@ -531,14 +536,36 @@ class SqlEngine(object):

        return pd.read_sql(query, self.engine)

+    def fetch_industry_matrix(self,
+                              ref_date: str,
+                              codes: Iterable[int],
+                              category: str = 'sw',
+                              level: int = 1):
+        df = self.fetch_industry(ref_date, codes, category, level)
+        df['industry_name'] = df['industry']
+        df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
+        industries = industry_list(category, level)
+
+        in_s = []
+        out_s = []
+        for i in industries:
+            if i in df:
+                in_s.append(i)
+            else:
+                out_s.append(i)
+
+        res = df[['code', 'industry_code', 'industry_name'] + in_s]
+        res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
+        return res
+
    def fetch_industry_range(self,
                             universe: Universe,
                             start_date: str = None,
                             end_date: str = None,
                             dates: Iterable[str] = None,
-                             category: str = 'sw'):
+                             category: str = 'sw',
+                             level: int = 1):
        industry_category_name = _map_industry_category(category)
-
        cond = universe._query_statements(start_date, end_date, dates)

        big_table = join(Industry, UniverseTable,
@@ -547,13 +574,15 @@ class SqlEngine(object):
                             Industry.code == UniverseTable.code,
                             Industry.industry == industry_category_name,
                             cond
-                         )
-                    )
+                         ))
+
+        code_name = 'industryID' + str(level)
+        category_name = 'industryName' + str(level)

        query = select([Industry.trade_date,
                        Industry.code,
-                        Industry.industryID1.label('industry_code'),
-                        Industry.industryName1.label('industry')]).select_from(big_table).distinct()
+                        getattr(Industry, code_name).label('industry_code'),
+                        getattr(Industry, category_name).label('industry')]).select_from(big_table).distinct()

        df = pd.read_sql(query, self.engine)
        if universe.is_filtered:
@@ -561,7 +590,46 @@ class SqlEngine(object):
            df = pd.merge(df, codes, how='inner', on=['trade_date', 'code']).sort_values(['trade_date', 'code'])
        return df

-    def fetch_data(self, ref_date: str,
+    def fetch_industry_matrix_range(self,
+                                    universe: Universe,
+                                    start_date: str = None,
+                                    end_date: str = None,
+                                    dates: Iterable[str] = None,
+                                    category: str = 'sw',
+                                    level: int = 1):
+
+        df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
+        df['industry_name'] = df['industry']
+        df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
+        industries = industry_list(category, level)
+
+        in_s = []
+        out_s = []
+        for i in industries:
+            if i in df:
+                in_s.append(i)
+            else:
+                out_s.append(i)
+
+        res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
+
+        res = res.assign(**dict(zip(out_s, [0]*len(out_s))))
+        return res
+
+    def fetch_trade_status(self,
+                           ref_date: str,
+                           codes: Iterable[int]):
+
+        query = select([Market.code, Market.isOpen]).where(
+            and_(
+                Market.trade_date == ref_date,
+                Market.code.in_(codes)
+            )
+        )
+        return pd.read_sql(query, self.engine).sort_values(['code'])
+
+    def fetch_data(self,
+                   ref_date: str,
                   factors: Iterable[str],
                   codes: Iterable[int],
                   benchmark: int = None,
@@ -802,10 +870,10 @@ class SqlEngine(object):
        else:
            id_filter = 'in_'

-        t = select([table.trade_id]).\
+        t = select([table.trade_id]). \
            where(and_(table.trade_date <= ref_date,
                       table.operation == 'withdraw')).alias('t')
-        query = select([table]).\
+        query = select([table]). \
            where(and_(getattr(table.trade_id, id_filter)(t),
                       table.trade_date <= ref_date,
                       table.operation == 'lend'))
@@ -823,7 +891,7 @@ class SqlEngine(object):
            rule = x['price_rule'].split('@')

            if rule[0] in ['closePrice', 'openPrice']:
-                query = select([getattr(Market, rule[0])]).\
+                query = select([getattr(Market, rule[0])]). \
                    where(and_(Market.code == code, Market.trade_date == rule[1]))
                data = pd.read_sql(query, self.engine)
                if not data.empty:
@@ -835,6 +903,7 @@ class SqlEngine(object):
            else:
                raise KeyError('do not have rule for %s' % x['price_rule'])
            return price
+
        df['price'] = df.apply(lambda x: parse_price_rule(x), axis=1)

        df.drop(['remark', 'price_rule', 'operation'], axis=1, inplace=True)
@@ -848,12 +917,10 @@ class SqlEngine(object):


 if __name__ == '__main__':
-
    universe = Universe('ss', ['hs300'])

    engine = SqlEngine()
-
-    df = engine.fetch_outright_status('2017-12-28')
-
-    print(df)
-
+    ref_date = '2017-12-28'
+    codes = universe.query(engine, dates=[ref_date])
+    df = engine.fetch_trade_status(ref_date, codes.code.tolist())
+    print(df)
\ No newline at end of file
--- a/alphamind/data/engines/utilities.py
+++ b/alphamind/data/engines/utilities.py
@@ -13,6 +13,7 @@ from alphamind.data.dbmodel.models import RiskCovLong
 from alphamind.data.dbmodel.models import FullFactor
 from alphamind.data.dbmodel.models import Gogoal
 from alphamind.data.dbmodel.models import Experimental
+from alphamind.data.engines.industries import INDUSTRY_MAPPING


 factor_tables = [FullFactor, Gogoal, Experimental]
@@ -43,5 +44,17 @@ def _map_factors(factors: Iterable[str], used_factor_tables) -> Dict:
 def _map_industry_category(category: str) -> str:
    if category == 'sw':
        return '申万行业分类'
+    if category == 'sw_adj':
+        return '申万行业分类修订'
+    elif category == 'zz':
+        return '中证行业分类'
+    elif category == 'dx':
+        return '东兴行业分类'
+    elif category == 'zjh':
+        return '证监会行业V2012'
    else:
-        raise ValueError("No other industry is supported at the current time")
\ No newline at end of file
+        raise ValueError("No other industry is supported at the current time")
+
+
+def industry_list(category: str, level: int=1) -> list:
+    return INDUSTRY_MAPPING[category][level]
\ No newline at end of file
--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
        mean_values = transform(groups, x, 'mean')
        std_values = transform(groups, x, 'std', ddof)

-        return (x - mean_values) / std_values
+        return (x - mean_values) / np.maximum(std_values, 1e-8)
    else:
-        return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
+        return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)


 def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
@@ -48,7 +48,7 @@ class Standardizer(object):
        self.std_ = simple_std(x, axis=0, ddof=self.ddof_)

    def transform(self, x: np.ndarray) -> np.ndarray:
-        return (x - self.mean_) / self.std_
+        return (x - self.mean_) / np.maximum(self.std_, 1e-8)


 class GroupedStandardizer(object):
@@ -69,4 +69,4 @@ class GroupedStandardizer(object):
    def transform(self, x: np.ndarray) -> np.ndarray:
        groups = x[:, 0].astype(int)
        index = array_index(self.labels_, groups)
-        return (x[:, 1:] - self.mean_[index]) / self.std_[index]
+        return (x[:, 1:] - self.mean_[index]) / np.maximum(self.std_[index], 1e-8)
--- a/alphamind/data/transformer.py
+++ b/alphamind/data/transformer.py
@@ -5,6 +5,7 @@ Created on 2017-8-23
 @author: cheng.li
 """

+import copy
 import pandas as pd
 from PyFin.api import pyFinAssert
 from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
@@ -58,7 +59,7 @@ class Transformer(object):
    def __init__(self,
                 expressions):
        expression_dict, expression_dependency = \
-            factor_translator(expressions)
+            factor_translator(copy.deepcopy(expressions))

        if expression_dict:
            self.names = sorted(expression_dict.keys())

--- a/alphamind/examples/factor_analysis_example.py
+++ b/alphamind/examples/factor_analysis_example.py
--- a/alphamind/examples/factor_res_analysis.py
+++ b/alphamind/examples/factor_res_analysis.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-1-15
+
+@author: cheng.li
+"""
+
+import numpy as np
+import pandas as pd
+from PyFin.api import *
+from alphamind.api import *
+
+
+def factor_residue_analysis(start_date,
+                            end_date,
+                            factor_name,
+                            factor,
+                            freq,
+                            universe,
+                            engine):
+    neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
+    n_bins = 5
+    horizon = map_freq(freq)
+
+    dates = makeSchedule(start_date,
+                         end_date,
+                         tenor=freq,
+                         calendar='china.sse')
+
+    alpha_factor_name = factor_name + '_res'
+    alpha_factor = {alpha_factor_name: factor}
+    factor_all_data = engine.fetch_data_range(universe,
+                                              alpha_factor,
+                                              dates=dates)['factor']
+    return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
+
+    factor_groups = factor_all_data.groupby('trade_date')
+    return_groups = return_all_data.groupby('trade_date')
+    final_res = np.zeros((len(factor_groups.groups), n_bins))
+
+    index_dates = []
+
+    for i, value in enumerate(factor_groups):
+        date = value[0]
+        data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
+        returns = return_groups.get_group(date)
+
+        total_data = pd.merge(data, returns, on=['code']).dropna()
+        risk_exp = total_data[neutralize_risk].values.astype(float)
+        dx_return = total_data.dx.values
+
+        index_dates.append(date)
+        try:
+            er = factor_processing(total_data[[alpha_factor_name]].values,
+                                   pre_process=[winsorize_normal, standardize],
+                                   risk_factors=risk_exp,
+                                   post_process=[winsorize_normal, standardize])
+            res = er_quantile_analysis(er,
+                                       n_bins=n_bins,
+                                       dx_return=dx_return)
+        except Exception as e:
+            print(e)
+            res = np.zeros(n_bins)
+
+        final_res[i] = res
+
+    df = pd.DataFrame(final_res, index=index_dates)
+
+    start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
+    df.loc[start_date] = 0.
+    df.sort_index(inplace=True)
+    df['$top1 - bottom1$'] = df[4] - df[0]
+    return df
+
+
+def factor_analysis(f_name):
+    from alphamind.api import SqlEngine, Universe, alpha_logger
+    engine = SqlEngine()
+    universe = Universe('custom', ['zz800'])
+    base1 = LAST('Alpha60')
+    base2 = CSRes('roe_q', base1)
+    base3 = CSRes(CSRes('ep_q', base1), base2)
+    factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
+    res = factor_residue_analysis('2010-01-01',
+                                  '2018-01-26',
+                                  f_name,
+                                  factor,
+                                  '10b',
+                                  universe,
+                                  engine)
+    alpha_logger.info('{0} is done'.format(f_name))
+    return f_name, res
+
+
+if __name__ == '__main__':
+    from dask.distributed import Client
+    client = Client('10.63.6.176:8786')
+
+    engine = SqlEngine()
+    df = engine.fetch_factor_coverage()
+    df = df[df.universe == 'zz800'].groupby('factor').mean()
+    df = df[df.coverage >= 0.98]
+    universe = Universe('custom', ['zz800'])
+
+    factor_df = pd.DataFrame()
+
+    tasks = client.map(factor_analysis, df.index.tolist())
+    res = client.gather(tasks)
+
+    for f_name, df in res:
+        factor_df[f_name] = df['$top1 - bottom1$']
+
+    # for i, f_name in enumerate(df.index):
+    #     base1 = LAST('Alpha60')
+    #     base2 = CSRes('roe_q', base1)
+    #     base3 = CSRes(CSRes('ep_q', base1), base2)
+    #     factor = CSRes(CSRes(CSRes(LAST(f_name), base1), base2), base3)
+    #     res = factor_residue_analysis('2010-01-01',
+    #                                   '2018-01-22',
+    #                                   f_name,
+    #                                   factor,
+    #                                   '10b',
+    #                                   universe,
+    #                                   engine)
+    #     factor_df[f_name] = res['$top1 - bottom1$']
+    #     alpha_logger.info('{0}: {1} is done'.format(i + 1, f_name))
--- a/alphamind/examples/formula_expression.py
+++ b/alphamind/examples/formula_expression.py
@@ -19,9 +19,12 @@ start = dt.datetime.now()

 universe = Universe('custom', ['zz800'])

-simple_expression = CSRes(LAST('OperCashInToAsset'), 'roe_q')
+factor_name = 'Beta20'
+base1 = LAST('roe_q')
+base2 = CSRes(LAST('ep_q'), 'roe_q')
+simple_expression = CSRes(CSRes(LAST(factor_name), base1), base2)

-alpha_factor_name = 'alpha_factor'
+alpha_factor_name = factor_name + '_res'
 alpha_factor = {alpha_factor_name: simple_expression}

 # end of formula definition
@@ -29,7 +32,7 @@ alpha_factor = {alpha_factor_name: simple_expression}
 engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')

 neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
-freq = '10b'
+freq = '5b'
 n_bins = 5
 horizon = map_freq(freq)

@@ -91,8 +94,6 @@ df = df.cumsum().plot(ax=axes[0], title='Quantile Analysis for {0}'.format(alpha

 # =================================================================== #

-factor_name = 'PE'
-
 alpha_factor_name = alpha_factor_name + '_1w_diff'
 alpha_factor = {alpha_factor_name: DIFF(simple_expression)}


--- a/alphamind/model/data_preparing.py
+++ b/alphamind/model/data_preparing.py
@@ -402,9 +402,9 @@ if __name__ == '__main__':
    universe = Universe('zz500', ['hs300', 'zz500'])
    neutralized_risk = ['SIZE']
    res = fetch_predict_phase(engine, ['ep_q'],
-                            '2018-01-08',
-                            '5b',
-                            universe,
-                            16,
-                            neutralized_risk=neutralized_risk)
+                              '2012-01-05',
+                              '5b',
+                              universe,
+                              16,
+                              neutralized_risk=neutralized_risk)
    print(res)
--- a/alphamind/model/linearmodel.py
+++ b/alphamind/model/linearmodel.py
@@ -32,8 +32,9 @@ class ConstLinearModel(ModelBase):

    def __init__(self,
                 features: list = None,
+                 formulas: dict = None,
                 weights: np.ndarray = None):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        if features is not None and weights is not None:
            pyFinAssert(len(features) == len(weights),
                        ValueError,
@@ -56,8 +57,8 @@ class ConstLinearModel(ModelBase):

 class LinearRegression(ModelBase):

-    def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

@@ -84,8 +85,8 @@ class LinearRegression(ModelBase):

 class LassoRegression(ModelBase):

-    def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
        self.trained_time = None

@@ -112,8 +113,8 @@ class LassoRegression(ModelBase):

 class LogisticRegression(ModelBase):

-    def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
-        super().__init__(features)
+    def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
+        super().__init__(features, formulas=formulas)
        self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)

    def save(self) -> dict:

--- a/alphamind/model/modelbase.py
+++ b/alphamind/model/modelbase.py
@@ -6,6 +6,7 @@ Created on 2017-9-4
 """

 import abc
+import copy
 import arrow
 import numpy as np
 from alphamind.utilities import alpha_logger
@@ -15,10 +16,13 @@ from alphamind.utilities import decode

 class ModelBase(metaclass=abc.ABCMeta):

-    def __init__(self, features: list=None):
+    def __init__(self, features: list=None, formulas: dict=None):
        if features is not None:
            self.features = list(features)
+        else:
+            self.features = None
        self.impl = None
+        self.formulas = copy.deepcopy(formulas)
        self.trained_time = None

    def fit(self, x: np.ndarray, y: np.ndarray):
@@ -43,6 +47,7 @@ class ModelBase(metaclass=abc.ABCMeta):
                          features=list(self.features),
                          trained_time=self.trained_time,
                          desc=encode(self.impl),
+                          formulas=encode(self.formulas),
                          internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
        return model_desc

@@ -50,6 +55,7 @@ class ModelBase(metaclass=abc.ABCMeta):
    def load(cls, model_desc: dict):
        obj_layout = cls()
        obj_layout.features = model_desc['features']
+        obj_layout.formulas = decode(model_desc['formulas'])
        obj_layout.trained_time = model_desc['trained_time']
        obj_layout.impl = decode(model_desc['desc'])
        return obj_layout

--- a/alphamind/model/treemodel.py
+++ b/alphamind/model/treemodel.py
@@ -28,7 +28,7 @@ class RandomForestRegressor(ModelBase):
                 max_features: str='auto',
                 features: List=None,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, **kwargs)
        self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
                                              max_features=max_features,
                                              **kwargs)
@@ -61,8 +61,9 @@ class RandomForestClassifier(ModelBase):
                 n_estimators: int=100,
                 max_features: str='auto',
                 features: List = None,
+                 formulas: dict = None,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
                                               max_features=max_features,
                                               **kwargs)
@@ -96,11 +97,14 @@ class XGBRegressor(ModelBase):
                 learning_rate: float=0.1,
                 max_depth: int=3,
                 features: List=None,
+                 formulas: dict = None,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = XGBRegressorImpl(n_estimators=n_estimators,
                                     learning_rate=learning_rate,
                                     max_depth=max_depth,
+                                     n_jobs=n_jobs,
                                     **kwargs)

    def save(self) -> dict:
@@ -131,12 +135,15 @@ class XGBClassifier(ModelBase):
                 learning_rate: float=0.1,
                 max_depth: int=3,
                 features: List = None,
+                 formulas: dict = None,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.impl = XGBClassifierImpl(n_estimators=n_estimators,
-                                     learning_rate=learning_rate,
-                                     max_depth=max_depth,
-                                     **kwargs)
+                                      learning_rate=learning_rate,
+                                      max_depth=max_depth,
+                                      n_jobs=n_jobs,
+                                      **kwargs)

    def save(self) -> dict:
        model_desc = super().save()
@@ -173,9 +180,11 @@ class XGBTrainer(ModelBase):
                 subsample=1.,
                 colsample_bytree=1.,
                 features: List = None,
-                 random_state=0,
+                 formulas: dict = None,
+                 random_state: int=0,
+                 n_jobs: int=1,
                 **kwargs):
-        super().__init__(features)
+        super().__init__(features, formulas=formulas)
        self.params = {
            'silent': 1,
            'objective': objective,
@@ -185,6 +194,7 @@ class XGBTrainer(ModelBase):
            'tree_method': tree_method,
            'subsample': subsample,
            'colsample_bytree': colsample_bytree,
+            'nthread': n_jobs,
            'seed': random_state
        }


--- a/alphamind/portfolio/allocations.py
+++ b/alphamind/portfolio/allocations.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2018-2-6
+
+@author: cheng.li
+"""
+
+from typing import List
+from typing import Tuple
+from math import inf
+import copy
+from PyFin.api import pyFinAssert
+
+
+class Allocation(object):
+
+    def __init__(self,
+                 code: int,
+                 minimum: int=0,
+                 maximum: int=inf,
+                 current: int=0):
+        self.code = code
+        self.minimum = minimum
+        self.maximum = maximum
+        self.current = current
+
+        pyFinAssert(self.minimum <= self.current, ValueError, "minimum qty should be lower than current")
+        pyFinAssert(self.maximum >= self.current, ValueError, "minimum qty should be greater than current")
+
+    def __repr__(self):
+        return "Allocation(code={0}, minimum={1}, maximum={2}, current={3})".format(self.code,
+                                                                                    self.minimum,
+                                                                                    self.maximum,
+                                                                                    self.current)
+
+
+class Portfolio(object):
+
+    def __init__(self,
+                 name: str,
+                 allocations: List[Allocation]):
+        self.name = name
+        self.allocations = {a.code: a for a in allocations}
+
+    def __getitem__(self, code):
+        try:
+            return self.allocations[code]
+        except KeyError:
+            allocation = Allocation(code, 0, 0, 0)
+            self.allocations[code] = allocation
+            return allocation
+
+    def __repr__(self):
+        return "Portfolio(name={0}, allocations={1})".format(self.name,
+                                                             list(self.allocations.values()))
+
+    @property
+    def codes(self) -> List[int]:
+        return sorted(self.allocations.keys())
+
+
+class Execution(object):
+
+    def __init__(self,
+                 code: int,
+                 qty: int,
+                 comment: str=None):
+        self.code = code
+        self.qty = qty
+        self.comment = comment
+
+    def __repr__(self):
+        return "Execution(code={0}, qty={1}, comment={2})".format(self.code,
+                                                                  self.qty,
+                                                                  self.comment)
+
+
+class Executions(object):
+
+    def __init__(self,
+                 name,
+                 executions: List[Execution]=None):
+        self.name = name
+        self.executions = executions
+
+    def __repr__(self):
+        return "Executions(name={0}, executions={1})".format(self.name,
+                                                             self.executions)
+
+
+class Asset(object):
+
+    def __init__(self,
+                 code: int,
+                 name: str=None,
+                 priority: List[str]=None,
+                 forbidden: List[str]=None):
+        self.code = code
+        self.name = name
+        if priority:
+            self.priority = set(priority)
+        else:
+            self.priority = set()
+
+        if forbidden:
+            self.forbidden = set(forbidden)
+        else:
+            self.forbidden = set()
+        self._validation()
+
+    def _validation(self):
+        for p in self.priority:
+            pyFinAssert(p not in self.forbidden, ValueError, "{0} in priority is in forbidden".format(p))
+
+    def __repr__(self):
+        return "Asset(code={0}, name={1}, priority={2}, forbidden={3})".format(self.code,
+                                                                               self.name,
+                                                                               self.priority,
+                                                                               self.forbidden)
+
+
+class TargetPositions(object):
+
+    def __init__(self,
+                 assets: List[Asset]=None,
+                 qtys: List[int]=None):
+
+        if assets:
+            self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
+        else:
+            self.targets = {}
+
+    def add_asset(self,
+                  asset: Asset,
+                  qty: int):
+        if asset.code in self.targets:
+            raise ValueError()
+        self.targets[asset.code] = (asset, qty)
+
+    def __getitem__(self, code: int) -> Tuple[Asset, int]:
+        return self.targets[code]
+
+    @property
+    def codes(self) -> List[int]:
+        return sorted(self.targets.keys())
+
+    def __repr__(self):
+        return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
+
+
+def handle_one_asset(pre_allocation: Allocation,
+                     asset: Asset,
+                     qty: int) -> Tuple[Execution, Allocation, int]:
+
+    minimum = pre_allocation.minimum
+    maximum = pre_allocation.maximum
+    current = pre_allocation.current
+    code = pre_allocation.code
+
+    if qty < minimum:
+        raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
+    elif qty < maximum:
+        # need to buy / sell
+        ex = Execution(code, qty - current)
+        allocation = Allocation(code,
+                                minimum=minimum,
+                                maximum=maximum,
+                                current=qty)
+        qty = 0
+    else:
+        ex = Execution(code, maximum - current)
+        allocation = Allocation(code,
+                                minimum=minimum,
+                                maximum=maximum,
+                                current=maximum)
+        qty = qty - maximum
+    return ex, allocation, qty
+
+
+def pass_through(target_pos: TargetPositions,
+                 portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]:
+
+    p_name = portfolio.name
+    new_target_pos = TargetPositions()
+
+    allocations = []
+    executions = []
+
+    for code in target_pos.codes:
+        asset, qty = target_pos[code]
+        if asset.priority:
+            raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
+
+        if p_name in asset.forbidden:
+            ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
+            allocation = copy.deepcopy(portfolio[code])
+            new_target_pos.add_asset(asset, qty)
+        else:
+            prev_allocation = portfolio[code]
+            ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty)
+            new_target_pos.add_asset(asset, qty)
+
+        allocations.append(allocation)
+        executions.append(ex)
+
+    return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos
+
+
+if __name__ == '__main__':
+
+    asset1 = Asset(1, 'a')
+    asset2 = Asset(2, 'b')
+    asset3 = Asset(3, 'b')
+    target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100])
+
+    allc1 = Allocation(1, 0, 100, 0)
+    allc2 = Allocation(2, 0, 400, 100)
+    allc2 = Allocation(3, 0, 400, 200)
+    portfolio = Portfolio('test1', [allc1, allc2])
+
+    executions, portfolio, target_pos = pass_through(target_pos, portfolio)
+
+
+
+
+
--- a/alphamind/portfolio/constraints.py
+++ b/alphamind/portfolio/constraints.py
@@ -5,17 +5,137 @@ Created on 2017-7-21
 @author: cheng.li
 """

+from deprecated import deprecated
 from math import inf
 import numpy as np
+import pandas as pd
+from enum import IntEnum
 from typing import Tuple
 from typing import Optional
+from typing import Dict
+from typing import List
+from typing import Union
+from typing import Iterable
+from PyFin.api import pyFinAssert


+class BoundaryDirection(IntEnum):
+    LOWER = -1
+    UPPER = 1
+
+
+class BoundaryType(IntEnum):
+    ABSOLUTE = 0
+    RELATIVE = 1
+
+
+class BoundaryImpl(object):
+
+    def __init__(self,
+                 direction: BoundaryDirection,
+                 b_type: BoundaryType,
+                 val: float):
+        self.direction = direction
+        self.b_type = b_type
+        self.val = val
+        self._validation()
+
+    def _validation(self):
+        pyFinAssert(self.b_type == BoundaryType.ABSOLUTE or self.b_type == BoundaryType.RELATIVE,
+                    ValueError,
+                    "Boundary Type {0} is not recognized".format(self.b_type))
+
+        pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
+                    ValueError,
+                    "Boundary direction {0} is not recognized".format(self.direction))
+
+    def __call__(self, center: float):
+        if self.b_type == BoundaryType.ABSOLUTE:
+            return self.val + center
+        else:
+            pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
+            return self.val * center
+
+
+class BoxBoundary(object):
+
+    def __init__(self,
+                 lower_bound: BoundaryImpl,
+                 upper_bound: BoundaryImpl):
+        self.lower = lower_bound
+        self.upper = upper_bound
+
+    def bounds(self, center):
+        l_b, u_b = self.lower(center), self.upper(center)
+        pyFinAssert(l_b <= u_b, ValueError, "lower bound should be lower then upper bound")
+        return l_b, u_b
+
+
+def create_box_bounds(names: List[str],
+                      b_type: Union[Iterable[BoundaryType], BoundaryType],
+                      l_val: Union[Iterable[float], float],
+                      u_val: Union[Iterable[float], float]) -> Dict[str, BoxBoundary]:
+    """
+    helper function to quickly create a series of bounds
+    """
+    bounds = dict()
+
+    if not hasattr(b_type, '__iter__'):
+        b_type = np.array([b_type] * len(names))
+
+    if not hasattr(l_val, '__iter__'):
+        l_val = np.array([l_val] * len(names))
+
+    if not hasattr(u_val, '__iter__'):
+        u_val = np.array([u_val] * len(names))
+
+    for i, name in enumerate(names):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             b_type[i],
+                             l_val[i])
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             b_type[i],
+                             u_val[i])
+        bounds[name] = BoxBoundary(lower, upper)
+    return bounds
+
+
+class LinearConstraints(object):
+
+    def __init__(self,
+                 bounds: Dict[str, BoxBoundary],
+                 cons_mat: pd.DataFrame,
+                 backbone: np.ndarray):
+        pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat")
+        pyFinAssert(cons_mat.shape[0] == len(backbone),
+                    "length of back bond should be same as number of rows of cons_mat")
+        self.names = list(bounds.keys())
+        self.bounds = bounds
+        self.cons_mat = cons_mat
+        self.backbone = backbone
+
+    def risk_targets(self) -> Tuple[np.ndarray, np.ndarray]:
+        lower_bounds = []
+        upper_bounds = []
+
+        for name in self.names:
+            center = self.backbone @ self.cons_mat[name].values
+            l, u = self.bounds[name].bounds(center)
+            lower_bounds.append(l)
+            upper_bounds.append(u)
+        return np.array(lower_bounds), np.array(upper_bounds)
+
+    @property
+    def risk_exp(self) -> np.ndarray:
+        return self.cons_mat[self.names].values
+
+
+@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
 class Constraints(object):

    def __init__(self,
-                 risk_exp: Optional[np.ndarray]=None,
-                 risk_names: Optional[np.ndarray]=None):
+                 risk_exp: Optional[np.ndarray] = None,
+                 risk_names: Optional[np.ndarray] = None):
        self.risk_exp = risk_exp

        if risk_names is not None:
@@ -68,4 +188,4 @@ if __name__ == '__main__':
    cons = Constraints(risk_exp, risk_names)

    cons.set_constraints('b', 0.0, 0.1)
-    print(cons.risk_targets())
\ No newline at end of file
+    print(cons.risk_targets())
--- a/alphamind/tests/portfolio/test_constraints.py
+++ b/alphamind/tests/portfolio/test_constraints.py
@@ -7,7 +7,14 @@ Created on 2017-7-20

 import unittest
 import numpy as np
+import pandas as pd
 from alphamind.portfolio.constraints import Constraints
+from alphamind.portfolio.constraints import BoxBoundary
+from alphamind.portfolio.constraints import BoundaryImpl
+from alphamind.portfolio.constraints import BoundaryDirection
+from alphamind.portfolio.constraints import BoundaryType
+from alphamind.portfolio.constraints import create_box_bounds
+from alphamind.portfolio.constraints import LinearConstraints


 class TestConstraints(unittest.TestCase):
@@ -43,6 +50,100 @@ class TestConstraints(unittest.TestCase):
        np.testing.assert_array_almost_equal(risk_targets[0], np.array([-0.1, -np.inf, -0.1]))
        np.testing.assert_array_almost_equal(risk_targets[1], np.array([0.1, np.inf, 0.1]))

+    def test_absolute_box_boundary(self):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             BoundaryType.ABSOLUTE,
+                             -0.8)
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             BoundaryType.ABSOLUTE,
+                             1.1)
+        bound = BoxBoundary(lower, upper)
+
+        center = 2.2
+        l, u = bound.bounds(center)
+        self.assertAlmostEqual(l, 1.4)
+        self.assertAlmostEqual(u, 3.3)
+
+    def test_relative_box_boundary(self):
+        lower = BoundaryImpl(BoundaryDirection.LOWER,
+                             BoundaryType.RELATIVE,
+                             0.8)
+        upper = BoundaryImpl(BoundaryDirection.UPPER,
+                             BoundaryType.RELATIVE,
+                             1.1)
+        bound = BoxBoundary(lower, upper)
+
+        center = 2.2
+        l, u = bound.bounds(center)
+        self.assertAlmostEqual(l, 1.76)
+        self.assertAlmostEqual(u, 2.42)
+
+    def test_create_box_bounds_single_value(self):
+        names = ['a', 'b', 'c']
+        b_type = BoundaryType.RELATIVE
+        l_val = 0.8
+        u_val = 1.1
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        for key, bound in bounds.items():
+            l_bound = bound.lower
+            u_bound = bound.upper
+            self.assertEqual(l_bound.b_type, b_type)
+            self.assertEqual(u_bound.b_type, b_type)
+            self.assertAlmostEqual(l_bound.val, l_val)
+            self.assertAlmostEqual(u_bound.val, u_val)
+
+    def test_create_box_bounds_multiple_values(self):
+        names = ['a', 'b', 'c']
+        b_type = BoundaryType.RELATIVE
+        l_val = [0.9, 0.8, 1.1]
+        u_val = [1.1, 1.2, 1.3]
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        for i, name in enumerate(names):
+            bound = bounds[name]
+            l_bound = bound.lower
+            u_bound = bound.upper
+            self.assertEqual(l_bound.b_type, b_type)
+            self.assertEqual(u_bound.b_type, b_type)
+            self.assertAlmostEqual(l_bound.val, l_val[i])
+            self.assertAlmostEqual(u_bound.val, u_val[i])
+
+    def test_linear_constraints(self):
+        cons_mat = np.random.randn(100, 3)
+        backbone = np.random.randn(100)
+        names = ['a', 'b', 'c']
+        cons_mat = pd.DataFrame(cons_mat, columns=names)
+
+        b_type = BoundaryType.ABSOLUTE
+        l_val = -0.8
+        u_val = 1.1
+
+        bounds = create_box_bounds(names,
+                                   b_type,
+                                   l_val,
+                                   u_val)
+
+        constraints = LinearConstraints(bounds=bounds,
+                                        cons_mat=cons_mat,
+                                        backbone=backbone)
+
+        l_bounds, u_bounds = constraints.risk_targets()
+        risk_exp = constraints.risk_exp
+
+        for i, name in enumerate(names):
+            center = risk_exp[:, i] @ backbone
+            self.assertAlmostEqual(center + l_val, l_bounds[i])
+            self.assertAlmostEqual(center + u_val, u_bounds[i])
+

 if __name__ == '__main__':
    unittest.main()
--- a/notebooks/full factor strategy.ipynb
+++ b/notebooks/full factor strategy.ipynb
--- a/requirements.txt
+++ b/requirements.txt
 arrow >= 0.10.0
 cython >= 0.25.2
+deprecated >= 1.1.0
 numpy >= 1.12.1
 pandas >= 0.19.2
 scikit-learn >= 0.18.1

--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ from Cython.Build import cythonize
 from distutils.extension import Extension
 import numpy as np

-VERSION = "0.1.0"
+VERSION = "0.1.1"

 if platform.system() != "Windows":
    import multiprocessing

--- a/xgboost @ bf436718
+++ b/xgboost @ bf436718
-Subproject commit a187ed6c8f3aa40b47d5be80667cbbe6a6fd563d
+Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999