change for rank build implementation

5e586da0 · Dr.李 · ae3af0c2 · 5e586da0 · 5e586da0 · 5e586da0
Commit 5e586da0 authored Apr 29, 2017 by Dr.李
Showing with 98 additions and 36 deletions

rankbuild.py alphamind/benchmarks/portfolio/rankbuild.py +20 -9

impl.pyx alphamind/portfolio/impl.pyx +57 -17

rankbuilder.py alphamind/portfolio/rankbuilder.py +19 -10

setup.py setup.py +2 -0

No files found.
--- a/alphamind/benchmarks/portfolio/rankbuild.py
+++ b/alphamind/benchmarks/portfolio/rankbuild.py
@@ -16,21 +16,27 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
    print("Starting portfolio construction by rank benchmarking")
    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))

-    x = np.random.randn(n_samples, 1)
+    n_portfolio = 10
+
+    x = np.random.randn(n_samples, n_portfolio)

    start = dt.datetime.now()
    for _ in range(n_loops):
-        _ = rank_build(x, n_included)
+        calc_weights = rank_build(x, n_included)
    impl_model_time = dt.datetime.now() - start

    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))

    start = dt.datetime.now()
    for _ in range(n_loops):
-        expected_weights = np.zeros((len(x), 1))
-        expected_weights[(-x).argsort(axis=0).argsort(axis=0) < n_included] = 1. / n_included
+        exp_weights = np.zeros((len(x), n_portfolio))
+        choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
+        for j in range(n_portfolio):
+            exp_weights[choosed_index[:, j], j] = 1. / n_included
    benchmark_model_time = dt.datetime.now() - start

+    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
+
    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))


@@ -39,12 +45,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
    print("Starting  portfolio construction by rank with group-by values benchmarking")
    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))

-    x = np.random.randn(n_samples, 1)
+    n_portfolio = 10
+
+    x = np.random.randn(n_samples, n_portfolio)
    groups = np.random.randint(n_groups, size=n_samples)

    start = dt.datetime.now()
    for _ in range(n_loops):
-        _ = rank_build(x, n_included, groups=groups)
+        calc_weights = rank_build(x, n_included, groups=groups)
    impl_model_time = dt.datetime.now() - start

    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
@@ -52,11 +60,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
    start = dt.datetime.now()
    for _ in range(n_loops):
        grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
-        expected_weights = np.zeros((len(x), 1))
-        masks = grouped_ordering <= n_included
-        expected_weights[masks] = 1. / np.sum(masks)
+        exp_weights = np.zeros((len(x), n_portfolio))
+        masks = (grouped_ordering <= n_included).values
+        for j in range(n_portfolio):
+            exp_weights[masks[:, j], j] = 1. / np.sum(masks[:, j])
    benchmark_model_time = dt.datetime.now() - start

+    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
+
    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))



--- a/alphamind/portfolio/impl.pyx
+++ b/alphamind/portfolio/impl.pyx
@@ -6,33 +6,73 @@ Created on 2017-4-29
 """

 import numpy as np
+cimport numpy as np
 from numpy import array
-cimport numpy as cnp
 cimport cython
-import cytoolz
+from cpython.dict cimport PyDict_GetItem, PyDict_SetItem
+from cpython.ref cimport PyObject
+from cpython.list cimport PyList_Append
+
+
+cdef inline object _groupby_core(dict d, object key, object item):
+    cdef PyObject *obj = PyDict_GetItem(d, key)
+    if obj is NULL:
+        val = []
+        PyList_Append(val, item)
+        PyDict_SetItem(d, key, val)
+    else:
+        PyList_Append(<object>obj, item)


 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
-cdef inline long index(tuple x):
-    return x[0]
+cpdef list groupby(long[:] groups):
+
+    cdef size_t length = groups.shape[0]
+    cdef dict group_ids = {}
+    cdef size_t i
+    cdef long curr_tag
+
+    for i in range(length):
+        _groupby_core(group_ids, groups[i], i)
+
+    return [array(v, dtype=np.int64) for v in group_ids.values()]


 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
-cpdef list groupby(long[:] groups):
+cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
+
+    cdef size_t length = index.shape[0]
+    cdef size_t width = index.shape[1]
+    cdef size_t i
+    cdef size_t j
+    cdef unsigned char* mat_ptr = &mat[0, 0]
+    cdef long long* index_ptr = &index[0, 0]
+    cdef size_t k
+
+    for i in range(length):
+        k = i * width
+        for j in range(width):
+            mat_ptr[index_ptr[k + j] * width + j] = True
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
+
+    cdef size_t length = index.shape[0]
+    cdef size_t width = index.shape[1]
+    cdef size_t i
+    cdef size_t j
+    cdef double* mat_ptr = &mat[0, 0]
+    cdef long long* index_ptr = &index[0, 0]
+    cdef size_t k

-    cdef int i
-    cdef long d
-    cdef list table
-    cdef tuple t
-    cdef list v
-    cdef dict group_dict
-    cdef list group_ids
-
-    table = [(d, i) for i, d in enumerate(groups)]
-    group_dict = cytoolz.groupby(index, table)
-    group_ids = [array([t[1] for t in v]) for v in group_dict.values()]
-    return group_ids
\ No newline at end of file
+    for i in range(length):
+        k = i * width
+        for j in range(width):
+            mat_ptr[index_ptr[k + j] * width + j] = val
--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
@@ -8,6 +8,8 @@ Created on 2017-4-26
 import numpy as np
 from numpy import zeros
 from alphamind.portfolio.impl import groupby
+from alphamind.portfolio.impl import set_value_bool
+from alphamind.portfolio.impl import set_value_double


 def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
@@ -39,25 +41,32 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
            masks = zeros((length, width), dtype=bool)
            for current_index in group_ids:
                current_ordering = neg_er[current_index].argsort(axis=0)
-                for j in range(width):
-                    masks[current_index[current_ordering[:use_rank, j]], j] = True
+                total_index = current_index[current_ordering[:use_rank]]
+                set_value_bool(masks.view(dtype=np.uint8), total_index)
            choosed = masks.sum(axis=0)

            for j in range(width):
                weights[masks[:, j], j] = 1. / choosed[j]
        else:
            ordering = neg_er.argsort(axis=0)
-            for j in range(width):
-                weights[ordering[:use_rank, j], j] = 1. / use_rank
+            set_value_double(weights, ordering[:use_rank], 1. / use_rank)
        return weights


 if __name__ == '__main__':
-    n_samples = 4
-    n_include = 1
-    n_groups = 2
+    # n_samples = 4000
+    # n_include = 100
+    # n_groups = 20
+    #
+    # x = np.random.randn(n_samples, 2)
+    # groups = np.random.randint(n_groups, size=n_samples)
+    #
+    # for i in range(10000):
+    #     rank_build(x, n_include, groups)

-    x = np.random.randn(n_samples, 2)
-    groups = np.random.randint(n_groups, size=n_samples)
+    from alphamind.portfolio.impl import set_value

-    calc_weights = rank_build(x, n_include, groups)
\ No newline at end of file
+    x = np.zeros((3, 2), dtype=np.bool)
+    index = np.array([[1, 0], [2, 1]])
+    set_value(x.view(dtype=np.uint8), index)
+    print(x)
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,9 @@ from setuptools import setup
 from setuptools import find_packages
 from distutils.extension import Extension
 import numpy as np
+import Cython
 from Cython.Build import cythonize
+Cython.Compiler.Options.annotate = True

 VERSION = "0.1.0"