Commit cee2b19a authored by Dr.李's avatar Dr.李

small enhancement

parent b3713d44
...@@ -5,8 +5,9 @@ Created on 2017-4-26 ...@@ -5,8 +5,9 @@ Created on 2017-4-26
@author: cheng.li @author: cheng.li
""" """
import numpy as np
cimport numpy as np cimport numpy as np
from numpy import zeros
from numpy import asarray
cimport cython cimport cython
from libc.math cimport sqrt from libc.math cimport sqrt
...@@ -14,7 +15,7 @@ from libc.math cimport sqrt ...@@ -14,7 +15,7 @@ from libc.math cimport sqrt
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cdef int max_groups(long[:] groups, size_t length) nogil: cdef int max_groups(long* groups, size_t length) nogil:
cdef long curr_max = 0 cdef long curr_max = 0
cdef size_t i cdef size_t i
cdef long curr cdef long curr
...@@ -29,10 +30,12 @@ cdef int max_groups(long[:] groups, size_t length) nogil: ...@@ -29,10 +30,12 @@ cdef int max_groups(long[:] groups, size_t length) nogil:
@cython.wraparound(False) @cython.wraparound(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t width): cdef double[:, :] agg_mean(long* groups, double* x, size_t length, size_t width):
cdef long max_g = max_groups(groups, length) cdef long max_g = max_groups(groups, length)
cdef double[:, :] res = np.zeros((max_g+1, width)) cdef double[:, :] res = zeros((max_g+1, width))
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int) cdef double* res_ptr = &res[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef size_t i cdef size_t i
cdef size_t j cdef size_t j
cdef long curr cdef long curr
...@@ -40,14 +43,14 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t ...@@ -40,14 +43,14 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t
with nogil: with nogil:
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
res[groups[i], j] += x[i, j] res_ptr[groups[i]*width + j] += x[i*width + j]
bin_count[groups[i]] += 1 bin_count_ptr[groups[i]] += 1
for i in range(res.shape[0]): for i in range(max_g+1):
curr = bin_count[i] curr = bin_count_ptr[i]
if curr != 0: if curr != 0:
for j in range(width): for j in range(width):
res[i, j] /= curr res_ptr[i*width + j] /= curr
return res return res
...@@ -55,14 +58,18 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t ...@@ -55,14 +58,18 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t
@cython.wraparound(False) @cython.wraparound(False)
@cython.cdivision(True) @cython.cdivision(True)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t width, long ddof=1): cdef double[:, :] agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1):
cdef long max_g = max_groups(groups, length) cdef long max_g = max_groups(groups, length)
cdef double[:, :] running_sum_square = np.zeros((max_g+1, width)) cdef double[:, :] running_sum_square = zeros((max_g+1, width))
cdef double[:, :] running_sum = np.zeros((max_g+1, width)) cdef double* running_sum_square_ptr = &running_sum_square[0, 0]
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int) cdef double[:, :] running_sum = zeros((max_g+1, width))
cdef double* running_sum_ptr = &running_sum[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef size_t i cdef size_t i
cdef size_t j cdef size_t j
cdef long k cdef long k
cdef size_t indice
cdef long curr cdef long curr
cdef double raw_value cdef double raw_value
...@@ -70,16 +77,17 @@ cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t ...@@ -70,16 +77,17 @@ cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t
for i in range(length): for i in range(length):
k = groups[i] k = groups[i]
for j in range(width): for j in range(width):
raw_value = x[i, j] raw_value = x[i*width + j]
running_sum[k, j] += raw_value running_sum_ptr[k*width + j] += raw_value
running_sum_square[k, j] += raw_value * raw_value running_sum_square_ptr[k*width + j] += raw_value * raw_value
bin_count[k] += 1 bin_count_ptr[k] += 1
for i in range(running_sum_square.shape[0]): for i in range(max_g+1):
curr = bin_count[i] curr = bin_count_ptr[i]
if curr > ddof: if curr != 0:
for j in range(width): for j in range(width):
running_sum_square[i, j] = sqrt((running_sum_square[i, j] - running_sum[i, j] * running_sum[i, j] / curr) / (curr - ddof)) indice = i * width + j
running_sum_square_ptr[indice] = sqrt((running_sum_square_ptr[indice] - running_sum_ptr[indice] * running_sum_ptr[indice] / curr) / (curr - ddof))
return running_sum_square return running_sum_square
...@@ -90,21 +98,25 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f ...@@ -90,21 +98,25 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cdef size_t length = x.shape[0] cdef size_t length = x.shape[0]
cdef size_t width = x.shape[1] cdef size_t width = x.shape[1]
cdef double[:, :] res_data = np.zeros((length, width)) cdef double[:, :] res_data = zeros((length, width))
cdef double[:, :] value_data = np.zeros((length, width)) cdef double* res_data_ptr = &res_data[0, 0]
cdef double[:, :] value_data = zeros((length, width))
cdef double* value_data_ptr
cdef size_t i cdef size_t i
cdef size_t j cdef size_t j
cdef size_t k cdef size_t k
if func == 'mean': if func == 'mean':
value_data = agg_mean(groups, x, length, width) value_data = agg_mean(&groups[0], &x[0, 0], length, width)
elif func == 'std': elif func == 'std':
value_data = agg_std(groups, x, length, width, ddof=1) value_data = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
value_data_ptr = &value_data[0, 0]
with nogil: with nogil:
for i in range(length): for i in range(length):
k = groups[i] k = groups[i]
for j in range(width): for j in range(width):
res_data[i, j] = value_data[k, j] res_data_ptr[i*width + j] = value_data_ptr[k*width + j]
return np.asarray(res_data) return asarray(res_data)
\ No newline at end of file \ No newline at end of file
...@@ -18,4 +18,3 @@ def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: ...@@ -18,4 +18,3 @@ def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
return (x - mean_values) / std_values return (x - mean_values) / std_values
else: else:
return (x - x.mean(axis=0)) / x.std(axis=0) return (x - x.mean(axis=0)) / x.std(axis=0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment