using numba to simplify some optimization

f786b960 · Dr.李 · 3321d3af · f786b960 · f786b960 · f786b960
Commit f786b960 authored May 03, 2017 by Dr.李
9 changed files
--- a/alphamind/aggregate.pyx
+++ b/alphamind/aggregate.pyx
@@ -46,7 +46,7 @@ cpdef groupby(long[:] groups):
        else:
            deref(it).second.push_back(i)
-    return group_ids.values()
+    return [np.array(v) for v in group_ids.values()]
 @cython.boundscheck(False)

--- a/alphamind/benchmarks/benchmarks.py
+++ b/alphamind/benchmarks/benchmarks.py
@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
 if __name__ == '__main__':
-    benchmark_neutralize(3000, 10, 1000)
+    # benchmark_neutralize(3000, 10, 1000)
-    benchmark_neutralize_with_groups(3000, 10, 1000, 30)
+    # benchmark_neutralize_with_groups(3000, 10, 1000, 30)
-    benchmark_neutralize(30, 3, 50000)
+    # benchmark_neutralize(30, 3, 50000)
-    benchmark_neutralize_with_groups(30, 3, 50000, 3)
+    # benchmark_neutralize_with_groups(30, 3, 50000, 3)
-    benchmark_neutralize(50000, 50, 20)
+    # benchmark_neutralize(50000, 50, 20)
-    benchmark_neutralize_with_groups(50000, 50, 20, 50)
+    # benchmark_neutralize_with_groups(50000, 50, 20, 50)
-    benchmark_standardize(3000, 10, 1000)
+    # benchmark_standardize(3000, 10, 1000)
-    benchmark_standardize_with_group(3000, 10, 1000, 30)
+    # benchmark_standardize_with_group(3000, 10, 1000, 30)
-    benchmark_standardize(30, 10, 50000)
+    # benchmark_standardize(30, 10, 50000)
-    benchmark_standardize_with_group(30, 10, 5000, 5)
+    # benchmark_standardize_with_group(30, 10, 5000, 5)
-    benchmark_standardize(50000, 50, 20)
+    # benchmark_standardize(50000, 50, 20)
-    benchmark_standardize_with_group(50000, 50, 20, 50)
+    # benchmark_standardize_with_group(50000, 50, 20, 50)
-    benchmark_winsorize_normal(3000, 10, 1000)
+    # benchmark_winsorize_normal(3000, 10, 1000)
-    benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
+    # benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
-    benchmark_winsorize_normal(30, 10, 50000)
+    # benchmark_winsorize_normal(30, 10, 50000)
-    benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
+    # benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
-    benchmark_winsorize_normal(50000, 50, 20)
+    # benchmark_winsorize_normal(50000, 50, 20)
-    benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
+    # benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
    benchmark_build_rank(3000, 1000, 300)
    benchmark_build_rank_with_group(3000, 1000, 10, 30)
    benchmark_build_rank(30, 50000, 3)
    benchmark_build_rank_with_group(30, 50000, 1, 3)
    benchmark_build_rank(50000, 20, 3000)
    benchmark_build_rank_with_group(50000, 20, 10, 300)
-    benchmark_simple_settle(3000, 10, 1000)
+    # benchmark_simple_settle(3000, 10, 1000)
-    benchmark_simple_settle_with_group(3000, 10, 1000, 30)
+    # benchmark_simple_settle_with_group(3000, 10, 1000, 30)
-    benchmark_simple_settle(30, 10, 50000)
+    # benchmark_simple_settle(30, 10, 50000)
-    benchmark_simple_settle_with_group(30, 10, 50000, 5)
+    # benchmark_simple_settle_with_group(30, 10, 50000, 5)
-    benchmark_simple_settle(50000, 50, 20)
+    # benchmark_simple_settle(50000, 50, 20)
-    benchmark_simple_settle_with_group(50000, 50, 20, 50)
+    # benchmark_simple_settle_with_group(50000, 50, 20, 50)
--- a/alphamind/model/__init__.py
+++ b/alphamind/model/__init__.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-5-2
+@author: cheng.li
+"""
\ No newline at end of file
--- a/alphamind/portfolio/impl.pyx
+++ b/alphamind/portfolio/impl.pyx
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-29
-@author: cheng.li
-"""
-import numpy as np
-cimport numpy as np
-cimport cython
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef void set_value_bool(unsigned char[:, :] mat, list index, long long[:, :] used_level):
-    cdef size_t length = used_level.shape[0]
-    cdef size_t width = used_level.shape[1]
-    cdef size_t i
-    cdef size_t j
-    cdef unsigned char* mat_ptr = &mat[0, 0]
-    cdef long long* used_level_ptr = &used_level[0, 0]
-    cdef size_t k
-    cdef size_t l
-    for i in range(length):
-        k = i * width
-        for j in range(width):
-            l =  index[used_level_ptr[k + j]]
-            mat_ptr[l * width + j] = True
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
-    cdef size_t length = index.shape[0]
-    cdef size_t width = index.shape[1]
-    cdef size_t i
-    cdef size_t j
-    cdef double* mat_ptr = &mat[0, 0]
-    cdef long long* index_ptr = &index[0, 0]
-    cdef size_t k
-    for i in range(length):
-        k = i * width
-        for j in range(width):
-            mat_ptr[index_ptr[k + j] * width + j] = val
--- a/alphamind/portfolio/percentailbuilder.py
+++ b/alphamind/portfolio/percentailbuilder.py
--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
@@ -6,10 +6,27 @@ Created on 2017-4-26
 """
 import numpy as np
+import numba as nb
 from numpy import zeros
 from alphamind.aggregate import groupby
-from alphamind.portfolio.impl import set_value_bool
-from alphamind.portfolio.impl import set_value_double
+@nb.njit
+def set_value_bool(mat, used_level, to_fill):
+    length, width = used_level.shape
+    for i in range(length):
+        for j in range(width):
+            k = used_level[i, j]
+            mat[k, j] = to_fill
+@nb.njit
+def set_value_double(mat, used_level, to_fill):
+    length, width = used_level.shape
+    for i in range(length):
+        for j in range(width):
+            k = used_level[i, j]
+            mat[k, j] = to_fill
 def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
@@ -25,7 +42,7 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
            for current_index in group_ids:
                current_ordering = neg_er[current_index].argsort()
                current_ordering.shape = -1, 1
-                set_value_bool(masks.view(dtype=np.uint8), current_index, current_ordering[:use_rank])
+                set_value_bool(masks, current_index[current_ordering[:use_rank]], True)
            weights[masks] = 1.
        else:
            ordering = neg_er.argsort()
@@ -42,7 +59,7 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
            masks = zeros((length, width), dtype=bool)
            for current_index in group_ids:
                current_ordering = neg_er[current_index].argsort(axis=0)
-                set_value_bool(masks.view(dtype=np.uint8), current_index, current_ordering[:use_rank])
+                set_value_bool(masks, current_index[current_ordering[:use_rank]], True)
            for j in range(width):
                weights[masks[:, j], j] = 1.
        else:

--- a/alphamind/tests/portfolio/test_rankbuild.py
+++ b/alphamind/tests/portfolio/test_rankbuild.py
@@ -18,7 +18,7 @@ class TestRankBuild(unittest.TestCase):
        n_samples = 3000
        n_included = 300
-        n_portfolios = range(10)
+        n_portfolios = range(1, 10)
        for n_portfolio in n_portfolios:
            x = np.random.randn(n_samples, n_portfolio)
@@ -39,7 +39,7 @@ class TestRankBuild(unittest.TestCase):
        n_include = 10
        n_groups = 30
-        n_portfolios = range(10)
+        n_portfolios = range(1, 10)
        for n_portfolio in n_portfolios:

--- a/notebooks/factor analysis.ipynb
+++ b/notebooks/factor analysis.ipynb
--- a/setup.py
+++ b/setup.py
@@ -25,8 +25,7 @@ else:
    line_trace = False
-ext_modules = ['alphamind/aggregate.pyx',
+ext_modules = ['alphamind/aggregate.pyx']
-               'alphamind/portfolio/impl.pyx']
 def generate_extensions(ext_modules, line_trace=False):