update code base for simplicity

54edf78f · Dr.李 · 9d3a00d0 · 54edf78f · 54edf78f · 54edf78f
Commit 54edf78f authored Aug 07, 2017 by Dr.李
4 changed files
--- a/alphamind/analysis/riskanalysis.py
+++ b/alphamind/analysis/riskanalysis.py
@@ -22,8 +22,7 @@ def risk_analysis(net_weight_series: pd.Series,
    idiosyncratic, other_stats = neutralize(risk_table.values,
                                            next_bar_return_series.values,
                                            group_idx,
-                                            output_exposure=True,
+                                            detail=True)
-                                            output_explained=True)
    systematic = other_stats['explained']
    exposure = other_stats['exposure']

--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -16,8 +16,7 @@ import alphamind.utilities as utils
 def neutralize(x: np.ndarray,
               y: np.ndarray,
               groups: np.ndarray=None,
-               output_explained: bool=False,
+               detail: bool=False,
-               output_exposure: bool=False,
               weights: np.ndarray=None) \
        -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
@@ -27,47 +26,37 @@ def neutralize(x: np.ndarray,
    if weights is None:
        weights = np.ones(len(y), dtype=float)
-    if groups is not None:
+    output_dict = {}
-        res = np.zeros(y.shape)
-        if y.ndim == 2:
+    if detail:
-            if output_explained:
+        exposure = np.zeros(x.shape + (y.shape[1],))
-                explained = np.zeros(x.shape + (y.shape[1],))
+        explained = np.zeros(x.shape + (y.shape[1],))
-            if output_exposure:
+        output_dict['exposure'] = exposure
-                exposure = np.zeros(x.shape + (y.shape[1],))
+        output_dict['explained'] = explained
-        else:
-            if output_explained:
-                explained = np.zeros(x.shape + (1,))
-            if output_exposure:
-                exposure = np.zeros(x.shape + (1,))
+    if groups is not None:
+        res = np.zeros(y.shape)
        index_diff, order = utils.groupby(groups)
        start = 0
-        for diff_loc in index_diff:
+        if detail:
-            curr_idx = order[start:diff_loc + 1]
+            for diff_loc in index_diff:
-            curr_x, b = _sub_step(x, y, weights, curr_idx, res)
+                curr_idx = order[start:diff_loc + 1]
-            if output_exposure:
+                curr_x, b = _sub_step(x, y, weights, curr_idx, res)
-                for i in range(exposure.shape[2]):
+                exposure[curr_idx, :, :] = b
-                    exposure[curr_idx, :, i] = b[:, i]
+                explained[curr_idx] = ls_explain(curr_x, b)
-            if output_explained:
+                start = diff_loc + 1
-                for i in range(explained.shape[2]):
+        else:
-                    explained[curr_idx] = ls_explain(curr_x, b)
+            for diff_loc in index_diff:
-            start = diff_loc + 1
+                curr_idx = order[start:diff_loc + 1]
+                _sub_step(x, y, weights, curr_idx, res)
+                start = diff_loc + 1
    else:
        b = ls_fit(x, y, weights)
        res = ls_res(x, y, b)
-        if output_explained:
+        if detail:
-            explained = ls_explain(x, b)
+            explained[:, :, :] = ls_explain(x, b)
-        if output_exposure:
+            exposure[:] = b
-            exposure = b
-    output_dict = {}
-    if output_explained:
-        output_dict['explained'] = explained
-    if output_exposure:
-        output_dict['exposure'] = exposure
    if output_dict:
        return res, output_dict
@@ -76,10 +65,8 @@ def neutralize(x: np.ndarray,
 @nb.njit(nogil=True, cache=True)
-def _sub_step(x, y, w, curr_idx, res):
+def _sub_step(x, y, w, curr_idx, res) -> Tuple[np.ndarray, np.ndarray]:
-    curr_x = x[curr_idx]
+    curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx]
-    curr_y = y[curr_idx]
-    curr_w = w[curr_idx]
    b = ls_fit(curr_x, curr_y, curr_w)
    res[curr_idx] = ls_res(curr_x, curr_y, b)
    return curr_x, b
@@ -99,21 +86,18 @@ def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
 @nb.njit(nogil=True, cache=True)
 def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
-    n = b.shape[1]
+    m, n = b.shape
-    explained = np.zeros(x.shape + (n,))
+    return b.reshape((1, m, n)) * x.reshape((-1, m, 1))
-    for i in range(n):
-        explained[:, :, i] = b[:, i] * x
-    return explained
 if __name__ == '__main__':
-    x = np.random.randn(3000)
+    x = np.random.randn(50000, 10)
-    y = np.random.randn(3000)
+    y = np.random.randn(50000, 1)
-    w = np.ones(3000)
+    w = np.ones(50000)
    import datetime as dt
    start = dt.datetime.now()
-    for i in range(1000):
+    for _ in range(1000):
        ls_fit(x, y, w)
    print(dt.datetime.now() - start)
--- a/alphamind/tests/data/test_neutralize.py
+++ b/alphamind/tests/data/test_neutralize.py
@@ -43,7 +43,7 @@ class TestNeutralize(unittest.TestCase):
    def test_neutralize_explain_output(self):
        y = self.y[:, 0].flatten()
-        calc_res, other_stats = neutralize(self.x, y, output_explained=True)
+        calc_res, other_stats = neutralize(self.x, y, detail=True)
        model = LinearRegression(fit_intercept=False)
        model.fit(self.x, y)
@@ -54,7 +54,7 @@ class TestNeutralize(unittest.TestCase):
        np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1))
        np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained)
-        calc_res, other_stats = neutralize(self.x, self.y, output_explained=True)
+        calc_res, other_stats = neutralize(self.x, self.y, detail=True)
        model = LinearRegression(fit_intercept=False)
        model.fit(self.x, self.y)
@@ -69,7 +69,7 @@ class TestNeutralize(unittest.TestCase):
    def test_neutralize_explain_output_with_group(self):
        y = self.y[:, 0].flatten()
-        calc_res, other_stats = neutralize(self.x, y, self.groups, output_explained=True)
+        calc_res, other_stats = neutralize(self.x, y, self.groups, detail=True)
        model = LinearRegression(fit_intercept=False)
        for i in range(30):
@@ -81,7 +81,7 @@ class TestNeutralize(unittest.TestCase):
            np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
            np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], exp_explained)
-        calc_res, other_stats = neutralize(self.x, self.y, self.groups, output_explained=True)
+        calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
        model = LinearRegression(fit_intercept=False)
        for i in range(30):

--- a/alphamind/utilities.py
+++ b/alphamind/utilities.py
@@ -67,6 +67,8 @@ def simple_sum(x, axis=0):
        for i in range(length):
            for j in range(width):
                res[i] += x[i, j]
+    else:
+        raise ValueError("axis value is not supported")
    return res
@@ -86,6 +88,8 @@ def simple_abssum(x, axis=0):
        for i in range(length):
            for j in range(width):
                res[i] += abs(x[i, j])
+    else:
+        raise ValueError("axis value is not supported")
    return res
@@ -106,6 +110,8 @@ def simple_mean(x, axis=0):
            for j in range(width):
                res[i] += x[i, j]
            res[i] /= width
+    else:
+        raise ValueError("axis value is not supported")
    return res
@@ -129,6 +135,8 @@ def simple_std(x, axis=0, ddof=1):
                res[i] += x[i, j] * x[i, j]
                sum_mat[i] += x[i, j]
            res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof))
+    else:
+        raise ValueError("axis value is not supported")
    return res