update benchmark and many numba function

d94862d5 · Dr.李 · 91f70b4e · d94862d5 · d94862d5 · d94862d5
Commit d94862d5 authored May 03, 2017 by Dr.李
9 changed files
--- a/alphamind/aggregate.py
+++ b/alphamind/aggregate.py
@@ -10,7 +10,68 @@ import numpy as np
 import numba as nb


-@nb.njit
+@nb.njit(nogil=True, cache=True)
+def simple_sum(x, axis=0):
+    length, width = x.shape
+
+    if axis == 0:
+        res = np.zeros(width)
+        for i in range(length):
+            for j in range(width):
+                res[j] += x[i, j]
+
+    elif axis == 1:
+        res = np.zeros(length)
+        for i in range(length):
+            for j in range(width):
+                res[i] += x[i, j]
+    return res
+
+
+@nb.njit(nogil=True, cache=True)
+def simple_mean(x, axis=0):
+    length, width = x.shape
+
+    if axis == 0:
+        res = np.zeros(width)
+        for j in range(width):
+            for i in range(length):
+                res[j] += x[i, j]
+            res[j] /= length
+
+    elif axis == 1:
+        res = np.zeros(length)
+        for i in range(length):
+            for j in range(width):
+                res[i] += x[i, j]
+            res[i] /= width
+    return res
+
+
+@nb.njit(nogil=True, cache=True)
+def simple_std(x, axis=0, ddof=1):
+    length, width = x.shape
+
+    if axis == 0:
+        res = np.zeros(width)
+        sum_mat = np.zeros(width)
+        for j in range(width):
+            for i in range(length):
+                res[j] += x[i, j] * x[i, j]
+                sum_mat[j] += x[i, j]
+            res[j] = math.sqrt((res[j] - sum_mat[j] * sum_mat[j] / length) / (length - ddof))
+    elif axis == 1:
+        res = np.zeros(length)
+        sum_mat = np.zeros(width)
+        for i in range(length):
+            for j in range(width):
+                res[i] += x[i, j] * x[i, j]
+                sum_mat[i] += x[i, j]
+            res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof))
+    return res
+
+
+@nb.njit(nogil=True, cache=True)
 def agg_sum(groups, x):
    max_g = groups.max()
    length, width = x.shape
@@ -22,7 +83,7 @@ def agg_sum(groups, x):
    return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def agg_abssum(groups, x):
    max_g = groups.max()
    length, width = x.shape
@@ -34,7 +95,7 @@ def agg_abssum(groups, x):
    return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def agg_mean(groups, x):
    max_g = groups.max()
    length, width = x.shape
@@ -53,7 +114,7 @@ def agg_mean(groups, x):
    return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def agg_std(groups, x, ddof=1):
    max_g = groups.max()
    length, width = x.shape
@@ -74,7 +135,7 @@ def agg_std(groups, x, ddof=1):
    return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def copy_value(groups, source):
    length = groups.shape[0]
    width = source.shape[1]
@@ -86,12 +147,12 @@ def copy_value(groups, source):
    return destination


-def transform(groups, x, func):
+def transform(groups, x, func, ddof=1):

    if func == 'mean':
        value_data = agg_mean(groups, x)
    elif func == 'std':
-        value_data = agg_std(groups, x, ddof=1)
+        value_data = agg_std(groups, x, ddof=ddof)
    elif func == 'sum':
        value_data = agg_sum(groups, x)
    elif func =='abssum':
@@ -102,11 +163,11 @@ def transform(groups, x, func):
    return copy_value(groups, value_data)


-def aggregate(groups, x, func):
+def aggregate(groups, x, func, ddof=1):
    if func == 'mean':
        value_data = agg_mean(groups, x)
    elif func == 'std':
-        value_data = agg_std(groups, x, ddof=1)
+        value_data = agg_std(groups, x, ddof=ddof)
    elif func == 'sum':
        value_data = agg_sum(groups, x)
    elif func =='abssum':

--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -69,19 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
        return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
    x_bar = x.T
    b = solve(x_bar @ x, x_bar @ y)
    return b


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
    return y - x @ b


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
    explained = np.zeros(x.shape + (b.shape[1],))
    for i in range(b.shape[1]):

--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
@@ -8,18 +8,20 @@ Created on 2017-4-25
 import numpy as np
 from alphamind.groupby import group_mapping
 from alphamind.aggregate import transform
+from alphamind.aggregate import simple_mean
+from alphamind.aggregate import simple_std


-def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:

    if groups is not None:
        groups = group_mapping(groups)
        mean_values = transform(groups, x, 'mean')
-        std_values = transform(groups, x, 'std')
+        std_values = transform(groups, x, 'std', ddof)

        return (x - mean_values) / std_values
    else:
-        return (x - x.mean(axis=0)) / x.std(axis=0)
+        return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0)


 if __name__ == '__main__':

--- a/alphamind/data/winsorize.py
+++ b/alphamind/data/winsorize.py
@@ -9,9 +9,11 @@ import numpy as np
 import numba as nb
 from alphamind.groupby import group_mapping
 from alphamind.aggregate import transform
+from alphamind.aggregate import simple_mean
+from alphamind.aggregate import simple_std


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def mask_values_2d(x: np.ndarray,
                   mean_values: np.ndarray,
                   std_values: np.ndarray,
@@ -31,7 +33,7 @@ def mask_values_2d(x: np.ndarray,
    return res


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def mask_values_1d(x: np.ndarray,
                   mean_values: np.ndarray,
                   std_values: np.ndarray,
@@ -57,10 +59,9 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None
        std_values = transform(groups, x, 'std')
        res = mask_values_2d(x, mean_values, std_values, num_stds)
    else:
-        std_values = x.std(axis=0)
-        mean_values = x.mean(axis=0)
+        std_values = simple_std(x, axis=0)
+        mean_values = simple_mean(x, axis=0)
        res = mask_values_1d(x, mean_values, std_values, num_stds)
-
    return res



--- a/alphamind/groupby.pyx
+++ b/alphamind/groupby.pyx
@@ -45,9 +45,9 @@ cpdef groupby(long[:] groups):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
-cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
+cpdef np.ndarray[long, ndim=1] group_mapping(long[:] groups):
    cdef size_t length = groups.shape[0]
-    cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
+    cdef np.ndarray[long, ndim=1] res= zeros(length, dtype=long)
    cdef cpp_map[long, long] current_hold
    cdef long curr_tag
    cdef long running_tag = -1

--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
@@ -12,7 +12,7 @@ from numpy import zeros_like
 from alphamind.groupby import groupby


-@nb.njit
+@nb.njit(nogil=True, cache=True)
 def set_value(mat, used_level, to_fill):
    length, width = used_level.shape
    for i in range(length):

--- a/alphamind/settlement/simplesettle.py
+++ b/alphamind/settlement/simplesettle.py
@@ -8,19 +8,20 @@ Created on 2017-4-28
 import numpy as np
 from alphamind.groupby import group_mapping
 from alphamind.aggregate import aggregate
+from alphamind.aggregate import simple_sum


 def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:

-    if ret_series.ndim > 1:
-        ret_series = ret_series.flatten()
+    if ret_series.ndim == 1:
+        ret_series = ret_series.reshape((-1, 1))

-    ret_mat = (ret_series * weights.T).T
+    ret_mat = weights * ret_series
    if groups is not None:
        groups = group_mapping(groups)
        return aggregate(groups, ret_mat, 'sum')
    else:
-        return ret_mat.sum(axis=0)
+        return simple_sum(ret_mat, axis=0)


 if __name__ == '__main__':

--- a/alphamind/tests/data/test_standardize.py
+++ b/alphamind/tests/data/test_standardize.py
@@ -19,7 +19,7 @@ class TestStandardize(unittest.TestCase):
        x = np.random.randn(3000, 10)

        calc_zscore = standardize(x)
-        exp_zscore = zscore(x)
+        exp_zscore = zscore(x, ddof=1)

        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
        
@@ -28,7 +28,7 @@ class TestStandardize(unittest.TestCase):
        groups = np.random.randint(10, 30, size=3000)

        calc_zscore = standardize(x, groups)
-        exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
+        exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)



--- a/alphamind/tests/data/test_winsorize.py
+++ b/alphamind/tests/data/test_winsorize.py
@@ -20,7 +20,7 @@ class TestWinsorize(unittest.TestCase):

        calc_winsorized = winsorize_normal(x, num_stds)

-        std_values = x.std(axis=0)
+        std_values = x.std(axis=0, ddof=1)
        mean_value = x.mean(axis=0)

        lower_bound = mean_value - num_stds * std_values
@@ -42,7 +42,7 @@ class TestWinsorize(unittest.TestCase):
        cal_winsorized = winsorize_normal(x, num_stds, groups)

        def impl(x):
-            std_values = x.std(axis=0)
+            std_values = x.std(axis=0, ddof=1)
            mean_value = x.mean(axis=0)

            lower_bound = mean_value - num_stds * std_values