Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
9b79874b
Commit
9b79874b
authored
Apr 30, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed linux bug
parent
7b72cb5a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
110 additions
and
79 deletions
+110
-79
aggregate.pyx
alphamind/aggregate.pyx
+108
-78
setup.py
setup.py
+2
-1
No files found.
alphamind/aggregate.pyx
View file @
9b79874b
...
...
@@ -11,8 +11,8 @@ cimport numpy as np
cimport cython
from libc.math cimport sqrt
from libc.math cimport fabs
from
libc.stdlib cimport c
alloc
from
libc.stdlib cimport f
ree
from
cpython.mem cimport PyMem_M
alloc
from
cpython.mem cimport PyMem_F
ree
from numpy import array
from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map
...
...
@@ -60,7 +60,7 @@ cpdef list groupby(long[:] groups):
@cython.wraparound(False)
@cython.initializedcheck(False)
cdef long* group_mapping(long* groups, size_t length, size_t* max_g):
cdef long *res_ptr = <long*>
calloc(length, sizeof(int
))
cdef long *res_ptr = <long*>
PyMem_Malloc(length*sizeof(long
))
cdef cpp_map[long, long] current_hold
cdef long curr_tag
cdef long running_tag = -1
...
...
@@ -85,14 +85,17 @@ cdef long* group_mapping(long* groups, size_t length, size_t* max_g):
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double* agg_sum(long* groups, size_t max_g, double* x, size_t length, size_t width)
nogil
:
cdef double* res_ptr = <double*>
calloc((max_g+1)*width,
sizeof(double))
cdef double* agg_sum(long* groups, size_t max_g, double* x, size_t length, size_t width):
cdef double* res_ptr = <double*>
PyMem_Malloc((max_g+1)*width*
sizeof(double))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
for i in range((max_g+1)*width):
res_ptr[i] = 0.
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
...
...
@@ -105,14 +108,17 @@ cdef double* agg_sum(long* groups, size_t max_g, double* x, size_t length, size_
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double* agg_abssum(long* groups, size_t max_g, double* x, size_t length, size_t width)
nogil
:
cdef double* res_ptr = <double*>
calloc((max_g+1)*width,
sizeof(double))
cdef double* agg_abssum(long* groups, size_t max_g, double* x, size_t length, size_t width):
cdef double* res_ptr = <double*>
PyMem_Malloc((max_g+1)*width*
sizeof(double))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
for i in range((max_g+1)*width):
res_ptr[i] = 0.
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
...
...
@@ -125,30 +131,37 @@ cdef double* agg_abssum(long* groups, size_t max_g, double* x, size_t length, si
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size_t width)
nogil
:
cdef double* res_ptr = <double*>
calloc((max_g+1)*width,
sizeof(double))
cdef long* bin_count_ptr = <long*>
calloc(max_g+1, sizeof(int
))
cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size_t width):
cdef double* res_ptr = <double*>
PyMem_Malloc((max_g+1)*width*
sizeof(double))
cdef long* bin_count_ptr = <long*>
PyMem_Malloc((max_g+1)*sizeof(long
))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
for j in range(width):
res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
bin_count_ptr[groups[i]] += 1
try:
for i in range((max_g+1)*width):
res_ptr[i] = 0.
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for i in range(max_g+1):
bin_count_ptr[i] = 0
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
for j in range(width):
res_ptr[loop_idx1 + j] /= curr
free(bin_count_ptr)
res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
loop_idx1 = i*width
for j in range(width):
res_ptr[loop_idx1 + j] /= curr
finally:
PyMem_Free(bin_count_ptr)
return res_ptr
...
...
@@ -156,10 +169,10 @@ cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_t width, long ddof=1)
nogil
:
cdef double* running_sum_square_ptr = <double*>
calloc((max_g+1)*width,
sizeof(double))
cdef double* running_sum_ptr = <double*>
calloc((max_g+1)*width,
sizeof(double))
cdef long* bin_count_ptr = <long*>
calloc(max_g+1, sizeof(int
))
cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_t width, long ddof=1):
cdef double* running_sum_square_ptr = <double*>
PyMem_Malloc((max_g+1)*width*
sizeof(double))
cdef double* running_sum_ptr = <double*>
PyMem_Malloc((max_g+1)*width*
sizeof(double))
cdef long* bin_count_ptr = <long*>
PyMem_Malloc((max_g+1)*sizeof(long
))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
...
...
@@ -167,26 +180,35 @@ cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_
cdef long curr
cdef double raw_value
for i in range(length):
loop_idx1 = i * width
loop_idx2 = groups[i] * width
try:
for i in range((max_g+1)*width):
running_sum_square_ptr[i] = 0.
running_sum_ptr[i] = 0.
for j in range(width):
raw_value = x[loop_idx1 + j]
running_sum_ptr[loop_idx2 + j] += raw_value
running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
loop_idx1 = i * width
if curr != 0:
for j in range(width):
loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
for i in range(max_g+1):
bin_count_ptr[i] = 0
free(running_sum_ptr)
free(bin_count_ptr)
for i in range(length):
loop_idx1 = i * width
loop_idx2 = groups[i] * width
for j in range(width):
raw_value = x[loop_idx1 + j]
running_sum_ptr[loop_idx2 + j] += raw_value
running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
loop_idx1 = i * width
if curr != 0:
for j in range(width):
loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
finally:
PyMem_Free(running_sum_ptr)
PyMem_Free(bin_count_ptr)
return running_sum_square_ptr
...
...
@@ -196,9 +218,9 @@ cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_
cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str func):
cdef size_t length = x.shape[0]
cdef size_t width = x.shape[1]
cdef size_t* max_g = <size_t*>
calloc(1,
sizeof(size_t))
cdef size_t* max_g = <size_t*>
PyMem_Malloc(
sizeof(size_t))
cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
cdef double* res_data_ptr = <double*>
calloc(length*width,
sizeof(double))
cdef double* res_data_ptr = <double*>
PyMem_Malloc(length*width*
sizeof(double))
cdef double* value_data_ptr
cdef np.ndarray[double, ndim=2] res
cdef size_t i
...
...
@@ -206,25 +228,28 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cdef size_t loop_idx1
cdef size_t loop_idx2
if func == 'mean':
value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func == 'std':
value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
elif func == 'sum':
value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func =='abssum':
value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
with nogil:
for i in range(length):
loop_idx1 = i*width
loop_idx2 = mapped_groups[i] * width
for j in range(width):
res_data_ptr[loop_idx1 + j] = value_data_ptr[loop_idx2 + j]
free(value_data_ptr)
free(mapped_groups)
free(max_g)
try:
if func == 'mean':
value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func == 'std':
value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
elif func == 'sum':
value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func =='abssum':
value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
with nogil:
for i in range(length):
loop_idx1 = i*width
loop_idx2 = mapped_groups[i] * width
for j in range(width):
res_data_ptr[loop_idx1 + j] = value_data_ptr[loop_idx2 + j]
finally:
PyMem_Free(value_data_ptr)
PyMem_Free(mapped_groups)
PyMem_Free(max_g)
res = np.PyArray_SimpleNewFromData(2, [length, width], np.NPY_FLOAT64, res_data_ptr)
PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
return res
...
...
@@ -236,22 +261,27 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cpdef np.ndarray[double, ndim=2] aggregate(long[:] groups, double[:, :] x, str func):
cdef size_t length = x.shape[0]
cdef size_t width = x.shape[1]
cdef size_t* max_g = <size_t*>
calloc(1,
sizeof(size_t))
cdef size_t* max_g = <size_t*>
PyMem_Malloc(
sizeof(size_t))
cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
cdef np.ndarray[double, ndim=2] res
cdef double* value_data_ptr
if func == 'mean':
value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func == 'std':
value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
elif func == 'sum':
value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func =='abssum':
value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
try:
if func == 'mean':
value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func == 'std':
value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
elif func == 'sum':
value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
elif func =='abssum':
value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
res = np.PyArray_SimpleNewFromData(2, [max_g[0]+1, width], np.NPY_FLOAT64, value_data_ptr)
PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
finally:
PyMem_Free(mapped_groups)
PyMem_Free(max_g)
res = np.PyArray_SimpleNewFromData(2, [max_g[0]+1, width], np.NPY_FLOAT64, value_data_ptr)
PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
free(mapped_groups)
free(max_g)
return res
\ No newline at end of file
setup.py
View file @
9b79874b
...
...
@@ -42,7 +42,8 @@ def generate_extensions(ext_modules, line_trace=False):
for
pyxfile
in
ext_modules
:
ext
=
Extension
(
name
=
'.'
.
join
(
pyxfile
.
split
(
'/'
))[:
-
4
],
sources
=
[
pyxfile
],
define_macros
=
define_macros
)
define_macros
=
define_macros
,
extra_compile_args
=
[
'-std=c++11'
])
extensions
.
append
(
ext
)
return
extensions
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment