update factor_analysis implementation

9256a8be · Dr.李 · 108d2d78 · 9256a8be · 9256a8be · 9256a8be
Commit 9256a8be authored Aug 21, 2017 by Dr.李
Showing with 76 additions and 61 deletions

factoranalysis.py alphamind/analysis/factoranalysis.py +34 -24

simplesettle.py alphamind/settlement/simplesettle.py +35 -12

test_simplesettle.py alphamind/tests/settlement/test_simplesettle.py +7 -25

No files found.
--- a/alphamind/analysis/factoranalysis.py
+++ b/alphamind/analysis/factoranalysis.py
@@ -18,12 +18,12 @@ from alphamind.portfolio.percentbuilder import percent_build
 from alphamind.portfolio.linearbuilder import linear_build
 from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
 from alphamind.analysis.utilities import FDataPack
+from alphamind.settlement.simplesettle import simple_settle


 def build_portfolio(er: np.ndarray,
-                    builder: Optional[str]='long_short',
+                    builder: Optional[str] = 'long_short',
                    **kwargs) -> np.ndarray:
-
    builder = builder.lower()

    if builder == 'ls' or builder == 'long_short':
@@ -49,16 +49,15 @@ def build_portfolio(er: np.ndarray,
 def factor_analysis(factors: pd.DataFrame,
                    factor_weights: np.ndarray,
                    industry: np.ndarray,
-                    d1returns: np.ndarray=None,
+                    d1returns: np.ndarray = None,
                    detail_analysis=True,
-                    benchmark: Optional[np.ndarray]=None,
-                    risk_exp: Optional[np.ndarray]=None,
-                    is_tradable: Optional[np.ndarray]=None,
-                    constraints: Optional[Constraints]=None,
+                    benchmark: Optional[np.ndarray] = None,
+                    risk_exp: Optional[np.ndarray] = None,
+                    is_tradable: Optional[np.ndarray] = None,
+                    constraints: Optional[Constraints] = None,
                    method='risk_neutral',
                    do_neutralize=True,
                    **kwargs) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
-
    data_pack = FDataPack(raw_factors=factors.values,
                          groups=industry,
                          benchmark=benchmark,
@@ -77,10 +76,29 @@ def factor_analysis(factors: pd.DataFrame,
    else:
        post_process = [standardize]

-    er = data_pack.factor_processing(pre_process,  post_process, do_neutralize) @ factor_weights
-
+    er = data_pack.factor_processing(pre_process, post_process, do_neutralize) @ factor_weights
+
+    return er_analysis(er,
+                       industry,
+                       d1returns,
+                       constraints,
+                       detail_analysis,
+                       benchmark,
+                       is_tradable,
+                       method,
+                       **kwargs)
+
+
+def er_analysis(er: np.ndarray,
+                industry: np.ndarray,
+                dx_return: np.ndarray,
+                constraints: Constraints,
+                detail_analysis=True,
+                benchmark: Optional[np.ndarray] = None,
+                is_tradable: Optional[np.ndarray] = None,
+                method='risk_neutral',
+                **kwargs) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
    def create_constraints(benchmark, **kwargs):
-
        if 'lbound' in kwargs:
            lbound = kwargs['lbound']
            del kwargs['lbound']
@@ -92,21 +110,14 @@ def factor_analysis(factors: pd.DataFrame,
            del kwargs['ubound']
        else:
            ubound = 0.01 + benchmark
-
        if is_tradable is not None:
            ubound[~is_tradable] = np.minimum(lbound, ubound)[~is_tradable]

-        if constraints:
-            risk_lbound, risk_ubound = constraints.risk_targets()
-            cons_exp = constraints.risk_exp
-        else:
-            cons_exp = risk_exp
-            risk_lbound = data_pack.benchmark_constraints()
-            risk_ubound = data_pack.benchmark_constraints()
-
+        risk_lbound, risk_ubound = constraints.risk_targets()
+        cons_exp = constraints.risk_exp
        return lbound, ubound, cons_exp, risk_lbound, risk_ubound

-    if benchmark is not None and risk_exp is not None and method == 'risk_neutral':
+    if benchmark is not None and method == 'risk_neutral':
        lbound, ubound, cons_exp, risk_lbound, risk_ubound = create_constraints(benchmark, **kwargs)
        status, _, weights = linear_build(er,
                                          risk_constraints=cons_exp,
@@ -143,11 +154,10 @@ def factor_analysis(factors: pd.DataFrame,
        raise ValueError("Unknown building tpe ({0})".format(method))

    if detail_analysis:
-        analysis = data_pack.settle(weights, d1returns)
+        analysis = simple_settle(weights, dx_return, industry, benchmark)
    else:
        analysis = None
    return pd.DataFrame({'weight': weights,
                         'industry': industry,
-                         'er': er},
-                        index=factors.index),\
+                         'er': er}), \
           analysis
--- a/alphamind/settlement/simplesettle.py
+++ b/alphamind/settlement/simplesettle.py
@@ -6,21 +6,44 @@ Created on 2017-4-28
 """

 import numpy as np
-from alphamind.utilities import group_mapping
-from alphamind.utilities import aggregate
-from alphamind.utilities import simple_sum
+import pandas as pd


-def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+def simple_settle(weights: np.ndarray,
+                  dx_return: np.ndarray,
+                  groups: np.ndarray=None,
+                  benchmark: np.ndarray=None) -> pd.DataFrame:

-    if ret_series.ndim == 1:
-        ret_series = ret_series.reshape((-1, 1))
+    weights = weights.flatten()
+    dx_return = dx_return.flatten()
+
+    if benchmark is not None:
+        net_pos = weights - benchmark
+    else:
+        net_pos = weights
+
+    ret_arr = net_pos * dx_return

-    ret_mat = weights * ret_series
    if groups is not None:
-        groups = group_mapping(groups)
-        return aggregate(groups, ret_mat, 'sum')
+        ret_agg = pd.Series(ret_arr).groupby(groups).sum()
+        ret_agg.loc['total'] = ret_agg.sum()
    else:
-        if ret_mat.ndim == 1:
-            ret_mat = ret_mat.reshape((-1, 1))
-        return simple_sum(ret_mat, axis=0)
+        ret_agg = pd.Series(ret_arr.sum(), index=['total'])
+
+    ret_agg.index.name = 'industry'
+    ret_agg.name = 'er'
+
+    pos_table = pd.DataFrame(net_pos, columns=['weight'])
+    pos_table['ret'] = dx_return
+
+    if groups is not None:
+        ic_table = pos_table.groupby(groups).corr()['ret'].loc[(slice(None), 'weight')]
+        ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
+    else:
+        ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
+
+    return pd.DataFrame({'er': ret_agg.values,
+                         'ic': ic_table.values},
+                        index=ret_agg.index)
+
+
--- a/alphamind/tests/settlement/test_simplesettle.py
+++ b/alphamind/tests/settlement/test_simplesettle.py
@@ -16,10 +16,8 @@ class TestSimpleSettle(unittest.TestCase):

    def setUp(self):
        self.n_samples = 3000
-        self.n_portfolio = 3
        self.n_groups = 30
-        self.weights = np.random.randn(self.n_samples,
-                                       self.n_portfolio)
+        self.weights = np.random.randn(self.n_samples)
        self.ret_series = np.random.randn(self.n_samples)
        self.groups = np.random.randint(self.n_groups, size=self.n_samples)

@@ -27,34 +25,18 @@ class TestSimpleSettle(unittest.TestCase):
        calc_ret = simple_settle(self.weights, self.ret_series)

        ret_series = self.ret_series.reshape((-1, 1))
-        expected_ret = (self.weights * ret_series).sum(axis=0)
+        expected_ret = self.weights @ ret_series

-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-        ret_series = np.random.randn(self.n_samples, 1)
-
-        calc_ret = simple_settle(self.weights, ret_series)
-
-        expected_ret = (self.weights * ret_series).sum(axis=0)
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+        self.assertAlmostEqual(calc_ret['er'][0], expected_ret[0])

    def test_simple_settle_with_group(self):
        calc_ret = simple_settle(self.weights, self.ret_series, self.groups)

-        ret_series = self.ret_series.reshape((-1, 1))
-        ret_mat = self.weights * ret_series
-        expected_ret = pd.DataFrame(ret_mat).groupby(self.groups).sum().values
-
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-        ret_series = np.random.randn(self.n_samples, 1)
-
-        calc_ret = simple_settle(self.weights, ret_series, self.groups)
-
-        ret_mat = self.weights * ret_series
-        expected_ret = pd.DataFrame(ret_mat).groupby(self.groups).sum().values
+        ret_series = self.weights * self.ret_series
+        expected_ret = pd.Series(ret_series).groupby(self.groups).sum().values

-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+        np.testing.assert_array_almost_equal(calc_ret['er'].values[:-1], expected_ret)
+        self.assertAlmostEqual(calc_ret['er'].values[-1], expected_ret.sum())


 if __name__ == '__main__':