restructure

414ed809 · Dr.李 · bbb01231 · 414ed809 · 414ed809 · 414ed809
Commit 414ed809 authored May 03, 2017 by Dr.李
8 changed files
--- a/alphamind/aggregate.py
+++ b/alphamind/aggregate.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-5-3
+@author: cheng.li
+"""
+import math
+import numpy as np
+import numba as nb
+@nb.njit
+def agg_sum(groups, x):
+    max_g = groups.max()
+    length, width = x.shape
+    res = np.zeros((max_g+1, width), dtype=np.float64)
+    for i in range(length):
+        for j in range(width):
+            res[groups[i], j] += x[i, j]
+    return res
+@nb.njit
+def agg_abssum(groups, x):
+    max_g = groups.max()
+    length, width = x.shape
+    res = np.zeros((max_g+1, width), dtype=np.float64)
+    for i in range(length):
+        for j in range(width):
+            res[groups[i], j] += abs(x[i, j])
+    return res
+@nb.njit
+def agg_mean(groups, x):
+    max_g = groups.max()
+    length, width = x.shape
+    res = np.zeros((max_g+1, width), dtype=np.float64)
+    bin_count = np.zeros(max_g+1, dtype=np.int32)
+    for i in range(length):
+        for j in range(width):
+            res[groups[i], j] += x[i, j]
+        bin_count[groups[i]] += 1
+    for i in range(max_g+1):
+        curr = bin_count[i]
+        for j in range(width):
+            res[i, j] /= curr
+    return res
+@nb.njit
+def agg_std(groups, x, ddof=1):
+    max_g = groups.max()
+    length, width = x.shape
+    res = np.zeros((max_g+1, width), dtype=np.float64)
+    sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
+    bin_count = np.zeros(max_g+1, dtype=np.int32)
+    for i in range(length):
+        for j in range(width):
+            res[groups[i], j] += x[i, j]
+            sumsq[groups[i], j] += x[i, j] * x[i, j]
+        bin_count[groups[i]] += 1
+    for i in range(max_g+1):
+        curr = bin_count[i]
+        for j in range(width):
+            res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
+    return res
+@nb.njit
+def set_value(groups, source, destinantion):
+    length, width = destinantion.shape
+    for i in range(length):
+        k = groups[i]
+        for j in range(width):
+            destinantion[i, j] = source[k, j]
+def transform(groups, x, func):
+    res = np.zeros_like(x)
+    if func == 'mean':
+        value_data = agg_mean(groups, x)
+    elif func == 'std':
+        value_data = agg_std(groups, x, ddof=1)
+    elif func == 'sum':
+        value_data = agg_sum(groups, x)
+    elif func =='abssum':
+        value_data = agg_abssum(groups, x)
+    else:
+        raise ValueError('({0}) is not recognized as valid functor'.format(func))
+    set_value(groups, value_data, res)
+    return res
+def aggregate(groups, x, func):
+    if func == 'mean':
+        value_data = agg_mean(groups, x)
+    elif func == 'std':
+        value_data = agg_std(groups, x, ddof=1)
+    elif func == 'sum':
+        value_data = agg_sum(groups, x)
+    elif func =='abssum':
+        value_data = agg_abssum(groups, x)
+    else:
+        raise ValueError('({0}) is not recognized as valid functor'.format(func))
+    return value_data
+if __name__ == '__main__':
+    n_samples = 6000
+    n_features = 10
+    n_groups = 30
+    groups = np.random.randint(n_groups, size=n_samples)
+    max_g = n_groups - 1
+    x = np.random.randn(n_samples, n_features)
+    import datetime as dt
+    start = dt.datetime.now()
+    for i in range(1000):
+        res = aggregate(groups, x, 'mean')
+    print(dt.datetime.now() - start)
+    #transform = nb.jit(transform)
+    start = dt.datetime.now()
+    for i in range(1000):
+        res = aggregate(groups, x, 'mean')
+    print(dt.datetime.now() - start)
\ No newline at end of file
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -11,7 +11,7 @@ from numpy.linalg import solve
 from typing import Tuple
 from typing import Union
 from typing import Dict
-from alphamind.aggregate import groupby
+from alphamind.groupby import groupby
 def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \

--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
@@ -6,8 +6,8 @@ Created on 2017-4-25
 """
 import numpy as np
-from alphamind.aggregate import group_mapping
+from alphamind.groupby import group_mapping
-from alphamind.impl import transform
+from alphamind.aggregate import transform
 def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:

--- a/alphamind/data/winsorize.py
+++ b/alphamind/data/winsorize.py
@@ -6,8 +6,8 @@ Created on 2017-4-25
 """
 import numpy as np
-from alphamind.aggregate import group_mapping
+from alphamind.groupby import group_mapping
-from alphamind.impl import transform
+from alphamind.aggregate import transform
 def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:

--- a/alphamind/aggregate.pyx
+++ b/alphamind/aggregate.pyx
 # -*- coding: utf-8 -*-
 # distutils: language = c++
 """
 Created on 2017-4-26
 @author: cheng.li
 """
 import numpy as np
 from numpy import zeros
 from numpy import max as nmax
 cimport numpy as np
 cimport cython
 from libc.math cimport sqrt
 from libc.math cimport fabs
 from libcpp.vector cimport vector as cpp_vector
 from libcpp.unordered_map cimport unordered_map as cpp_map
 from cython.operator cimport dereference as deref
 ctypedef long long int64_t
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
 cpdef groupby(long[:] groups):
    cdef long long length = groups.shape[0]
    cdef cpp_map[long, cpp_vector[int64_t]] group_ids
    cdef long long i
    cdef long curr_tag
    cdef cpp_map[long, cpp_vector[int64_t]].iterator it
    cdef np.ndarray[long long, ndim=1] npy_array
    for i in range(length):
        curr_tag = groups[i]
        it = group_ids.find(curr_tag)
        if it == group_ids.end():
            group_ids[curr_tag] = [i]
        else:
            deref(it).second.push_back(i)
    return [np.array(v) for v in group_ids.values()]
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.initializedcheck(False)
 cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
    cdef size_t length = groups.shape[0]
    cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
    cdef cpp_map[long, long] current_hold
    cdef long curr_tag
    cdef long running_tag = -1
    cdef size_t i = 0
    cdef cpp_map[long, long].iterator it
    for i in range(length):
        curr_tag = groups[i]
        it = current_hold.find(curr_tag)
        if it == current_hold.end():
            running_tag += 1
            res[i] = running_tag
            current_hold[curr_tag] = running_tag
        else:
            res[i] = deref(it).second
    return res
--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
@@ -8,7 +8,7 @@ Created on 2017-4-26
 import numpy as np
 import numba as nb
 from numpy import zeros
-from alphamind.aggregate import groupby
+from alphamind.groupby import groupby
 @nb.njit

--- a/alphamind/settlement/simplesettle.py
+++ b/alphamind/settlement/simplesettle.py
@@ -6,8 +6,8 @@ Created on 2017-4-28
 """
 import numpy as np
-from alphamind.aggregate import group_mapping
+from alphamind.groupby import group_mapping
-from alphamind.impl import aggregate
+from alphamind.aggregate import aggregate
 def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:

--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ else:
    line_trace = False
-ext_modules = ['alphamind/aggregate.pyx']
+ext_modules = ['alphamind/groupby.pyx']
 def generate_extensions(ext_modules, line_trace=False):