small enhancements

beb0dcd3 · Dr.李 · 104437fd · beb0dcd3 · beb0dcd3
Commit beb0dcd3 authored May 10, 2017 by Dr.李
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 12 deletions

neutralize.py alphamind/data/neutralize.py +10 -4

linearmodel.py alphamind/model/linearmodel.py +23 -8

No files found.
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -38,10 +38,7 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
        groups_ids = groupby(groups)

        for curr_idx in groups_ids.values():
-            curr_x = x[curr_idx]
-            curr_y = y[curr_idx]
-            b = ls_fit(curr_x, curr_y)
-            res[curr_idx] = ls_res(curr_x, curr_y, b)
+            curr_x, b = _sub_step(x, y, curr_idx, res)
            if output_exposure:
                for i in range(exposure.shape[2]):
                    exposure[curr_idx, :, i] = b[:, i]
@@ -69,6 +66,15 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
        return res


+@nb.njit(nogil=True, cache=True)
+def _sub_step(x, y, curr_idx, res):
+    curr_x = x[curr_idx]
+    curr_y = y[curr_idx]
+    b = ls_fit(curr_x, curr_y)
+    res[curr_idx] = ls_res(curr_x, curr_y, b)
+    return curr_x, b
+
+
 @nb.njit(nogil=True, cache=True)
 def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
    x_bar = x.T

--- a/alphamind/model/linearmodel.py
+++ b/alphamind/model/linearmodel.py
@@ -7,6 +7,7 @@ Created on 2017-5-10

 from typing import Union
 import numpy as np
+import numba as nb
 from alphamind.cyimpl import groupby
 from alphamind.data.neutralize import ls_fit

@@ -20,7 +21,7 @@ class LinearModel(object):
        self.model_parameter = _train(x, y, groups)

    def predict(self, x, groups=None):
-        if groups is not None and isinstance(self.model_parameter, dict):
+        if groups is not None and self.model_parameter.ndim == 2:
            names = np.unique(groups)
            return multiple_prediction(names, self.model_parameter, x, groups)
        elif self.model_parameter is None:
@@ -31,6 +32,7 @@ class LinearModel(object):
            raise ValueError("grouped x value can't be used for vanilla linear model")


+@nb.njit(nogil=True, cache=True)
 def multiple_prediction(names, model_parames, x, groups):
    pred_v = np.zeros(x.shape[0])
    for name in names:
@@ -40,22 +42,28 @@ def multiple_prediction(names, model_parames, x, groups):
    return pred_v


-def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, dict]:
+def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
    if groups is None:
        return ls_fit(x, y)
    else:
        groups_ids = groupby(groups)
-        res_beta = {}
+        res_beta = np.zeros((max(groups_ids.keys())+1, x.shape[1]))

        for k, curr_idx in groups_ids.items():
-            curr_x = x[curr_idx]
-            curr_y = y[curr_idx]
-            res_beta[k] = ls_fit(curr_x, curr_y)
+            _train_sub_group(x, y, k, curr_idx, res_beta)

        return res_beta


+@nb.njit(nogil=True, cache=True)
+def _train_sub_group(x, y, k, curr_idx, res):
+    curr_x = x[curr_idx]
+    curr_y = y[curr_idx]
+    res[k] = ls_fit(curr_x, curr_y)
+
+
 if __name__ == '__main__':
+    import datetime as dt
    x = np.random.randn(3000, 10)
    y = np.random.randn(3000)
    groups = np.random.randint(30, size=3000)
@@ -65,5 +73,12 @@ if __name__ == '__main__':

    model = LinearModel()

-    model.calibrate(x, y, groups)
-    model.predict(to_x, to_groups)
\ No newline at end of file
+    start = dt.datetime.now()
+    for i in range(5000):
+        model.calibrate(x, y, groups)
+    print(dt.datetime.now() - start)
+
+    start = dt.datetime.now()
+    for i in range(50000):
+        model.predict(to_x, to_groups)
+    print(dt.datetime.now() - start)
\ No newline at end of file