Commit cee2b19a authored by Dr.李's avatar Dr.李

small enhancement

parent b3713d44
......@@ -5,8 +5,9 @@ Created on 2017-4-26
@author: cheng.li
"""
import numpy as np
cimport numpy as np
from numpy import zeros
from numpy import asarray
cimport cython
from libc.math cimport sqrt
......@@ -14,7 +15,7 @@ from libc.math cimport sqrt
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cdef int max_groups(long[:] groups, size_t length) nogil:
cdef int max_groups(long* groups, size_t length) nogil:
cdef long curr_max = 0
cdef size_t i
cdef long curr
......@@ -29,10 +30,12 @@ cdef int max_groups(long[:] groups, size_t length) nogil:
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t width):
cdef double[:, :] agg_mean(long* groups, double* x, size_t length, size_t width):
cdef long max_g = max_groups(groups, length)
cdef double[:, :] res = np.zeros((max_g+1, width))
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef double[:, :] res = zeros((max_g+1, width))
cdef double* res_ptr = &res[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef size_t i
cdef size_t j
cdef long curr
......@@ -40,14 +43,14 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t
with nogil:
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
res_ptr[groups[i]*width + j] += x[i*width + j]
bin_count_ptr[groups[i]] += 1
for i in range(res.shape[0]):
curr = bin_count[i]
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
res[i, j] /= curr
res_ptr[i*width + j] /= curr
return res
......@@ -55,14 +58,18 @@ cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t width, long ddof=1):
cdef double[:, :] agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1):
cdef long max_g = max_groups(groups, length)
cdef double[:, :] running_sum_square = np.zeros((max_g+1, width))
cdef double[:, :] running_sum = np.zeros((max_g+1, width))
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef double[:, :] running_sum_square = zeros((max_g+1, width))
cdef double* running_sum_square_ptr = &running_sum_square[0, 0]
cdef double[:, :] running_sum = zeros((max_g+1, width))
cdef double* running_sum_ptr = &running_sum[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef size_t i
cdef size_t j
cdef long k
cdef size_t indice
cdef long curr
cdef double raw_value
......@@ -70,16 +77,17 @@ cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t
for i in range(length):
k = groups[i]
for j in range(width):
raw_value = x[i, j]
running_sum[k, j] += raw_value
running_sum_square[k, j] += raw_value * raw_value
bin_count[k] += 1
for i in range(running_sum_square.shape[0]):
curr = bin_count[i]
if curr > ddof:
raw_value = x[i*width + j]
running_sum_ptr[k*width + j] += raw_value
running_sum_square_ptr[k*width + j] += raw_value * raw_value
bin_count_ptr[k] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
running_sum_square[i, j] = sqrt((running_sum_square[i, j] - running_sum[i, j] * running_sum[i, j] / curr) / (curr - ddof))
indice = i * width + j
running_sum_square_ptr[indice] = sqrt((running_sum_square_ptr[indice] - running_sum_ptr[indice] * running_sum_ptr[indice] / curr) / (curr - ddof))
return running_sum_square
......@@ -90,21 +98,25 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cdef size_t length = x.shape[0]
cdef size_t width = x.shape[1]
cdef double[:, :] res_data = np.zeros((length, width))
cdef double[:, :] value_data = np.zeros((length, width))
cdef double[:, :] res_data = zeros((length, width))
cdef double* res_data_ptr = &res_data[0, 0]
cdef double[:, :] value_data = zeros((length, width))
cdef double* value_data_ptr
cdef size_t i
cdef size_t j
cdef size_t k
if func == 'mean':
value_data = agg_mean(groups, x, length, width)
value_data = agg_mean(&groups[0], &x[0, 0], length, width)
elif func == 'std':
value_data = agg_std(groups, x, length, width, ddof=1)
value_data = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
value_data_ptr = &value_data[0, 0]
with nogil:
for i in range(length):
k = groups[i]
for j in range(width):
res_data[i, j] = value_data[k, j]
res_data_ptr[i*width + j] = value_data_ptr[k*width + j]
return np.asarray(res_data)
\ No newline at end of file
return asarray(res_data)
\ No newline at end of file
......@@ -18,4 +18,3 @@ def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
return (x - mean_values) / std_values
else:
return (x - x.mean(axis=0)) / x.std(axis=0)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment