Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
c7150a2c
Commit
c7150a2c
authored
Apr 26, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update impl
parent
77b3d469
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
67 additions
and
56 deletions
+67
-56
benchmarks.py
alphamind/benchmarks/benchmarks.py
+4
-0
impl.pyx
alphamind/data/impl.pyx
+55
-33
standardize.py
alphamind/data/standardize.py
+3
-9
winsorize.py
alphamind/data/winsorize.py
+5
-14
No files found.
alphamind/benchmarks/benchmarks.py
View file @
c7150a2c
...
@@ -20,7 +20,11 @@ if __name__ == '__main__':
...
@@ -20,7 +20,11 @@ if __name__ == '__main__':
benchmark_standardize_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_standardize_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_standardize
(
30
,
10
,
50000
)
benchmark_standardize
(
30
,
10
,
50000
)
benchmark_standardize_with_group
(
30
,
10
,
5000
,
5
)
benchmark_standardize_with_group
(
30
,
10
,
5000
,
5
)
benchmark_standardize
(
50000
,
50
,
20
)
benchmark_standardize_with_group
(
50000
,
50
,
20
,
50
)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_winsorize_normal
(
30
,
10
,
50000
)
benchmark_winsorize_normal
(
30
,
10
,
50000
)
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal
(
50000
,
50
,
20
)
benchmark_winsorize_normal_with_group
(
50000
,
50
,
20
,
50
)
alphamind/data/impl.pyx
View file @
c7150a2c
...
@@ -13,9 +13,9 @@ from libc.math cimport sqrt
...
@@ -13,9 +13,9 @@ from libc.math cimport sqrt
@cython.boundscheck(False)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.wraparound(False)
cdef int max_groups(long[:] groups,
long
length) nogil:
cdef int max_groups(long[:] groups,
size_t
length) nogil:
cdef long curr_max = 0
cdef long curr_max = 0
cdef
long
i
cdef
size_t
i
cdef long curr
cdef long curr
for i in range(length):
for i in range(length):
...
@@ -27,54 +27,76 @@ cdef int max_groups(long[:] groups, long length) nogil:
...
@@ -27,54 +27,76 @@ cdef int max_groups(long[:] groups, long length) nogil:
@cython.boundscheck(False)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.cdivision(True)
cpdef np.ndarray[double, ndim=2] agg_mean(long[:] groups, double[:, :] x):
cdef double[:, :] agg_mean(long[:] groups, double[:, :] x, size_t length, size_t width):
cdef long length = groups.shape[0]
cdef long width = x.shape[1]
cdef long max_g = max_groups(groups, length)
cdef long max_g = max_groups(groups, length)
cdef double[:, :] res = np.zeros((max_g+1, width))
cdef double[:, :] res = np.zeros((max_g+1, width))
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef
long
i
cdef
size_t
i
cdef
long
j
cdef
size_t
j
cdef long curr
cdef long curr
for i in range(length):
with nogil:
for j in range(width):
for i in range(length):
res[groups[i], j] += x[i, j]
for j in range(width):
bin_count[groups[i]] += 1
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
for i in range(res.shape[0]):
curr = bin_count[i]
if curr != 0:
for j in range(width):
res[i, j] /= curr
return res
@cython.boundscheck(False)
@cython.wraparound(False)
cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str func):
cdef size_t length = x.shape[0]
cdef size_t width = x.shape[1]
cdef double[:, :] res_data = np.zeros((length, width))
cdef double[:, :] value_data = np.zeros((length, width))
cdef size_t i
cdef size_t j
if func == 'mean':
value_data = agg_mean(groups, x, length, width)
elif func == 'std':
value_data = agg_std(groups, x, length, width, ddof=1)
for i in range(res.shape[0]):
with nogil:
curr = bin_count[i]
for i in range(length):
if curr != 0:
for j in range(width):
for j in range(width):
res[i, j] /= curr
res_data[i, j] = value_data[groups[i], j]
return np.asarray(res)
return np.asarray(res_data)
@cython.boundscheck(False)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.cdivision(True)
cpdef np.ndarray[double, ndim=2] agg_std(long[:] groups, double[:, :] x, long ddof=1):
cdef double[:, :] agg_std(long[:] groups, double[:, :] x, size_t length, size_t width, long ddof=1):
cdef long length = groups.shape[0]
cdef long width = x.shape[1]
cdef long max_g = max_groups(groups, length)
cdef long max_g = max_groups(groups, length)
cdef double[:, :] running_sum_square = np.zeros((max_g+1, width))
cdef double[:, :] running_sum_square = np.zeros((max_g+1, width))
cdef double[:, :] running_sum = np.zeros((max_g+1, width))
cdef double[:, :] running_sum = np.zeros((max_g+1, width))
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef long[:] bin_count = np.zeros(max_g+1, dtype=int)
cdef
long
i
cdef
size_t
i
cdef
long
j
cdef
size_t
j
cdef long curr
cdef long curr
cdef double raw_value
cdef double raw_value
for i in range(length):
with nogil:
for j in range(width):
for i in range(length):
raw_value = x[i, j]
running_sum[groups[i], j] += raw_value
running_sum_square[groups[i], j] += raw_value * raw_value
bin_count[groups[i]] += 1
for i in range(running_sum_square.shape[0]):
curr = bin_count[i]
if curr > ddof:
for j in range(width):
for j in range(width):
running_sum_square[i, j] = sqrt((running_sum_square[i, j] - running_sum[i, j] * running_sum[i, j] / curr) / (curr - ddof))
raw_value = x[i, j]
return np.asarray(running_sum_square)
running_sum[groups[i], j] += raw_value
\ No newline at end of file
running_sum_square[groups[i], j] += raw_value * raw_value
bin_count[groups[i]] += 1
for i in range(running_sum_square.shape[0]):
curr = bin_count[i]
if curr > ddof:
for j in range(width):
running_sum_square[i, j] = sqrt((running_sum_square[i, j] - running_sum[i, j] * running_sum[i, j] / curr) / (curr - ddof))
return running_sum_square
\ No newline at end of file
alphamind/data/standardize.py
View file @
c7150a2c
...
@@ -6,20 +6,14 @@ Created on 2017-4-25
...
@@ -6,20 +6,14 @@ Created on 2017-4-25
"""
"""
import
numpy
as
np
import
numpy
as
np
from
alphamind.data.impl
import
agg_mean
from
alphamind.data.impl
import
transform
from
alphamind.data.impl
import
agg_std
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
mean_values
=
agg_mean
(
groups
,
x
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
agg_std
(
groups
,
x
,
ddof
=
1
)
std_values
=
transform
(
groups
,
x
,
'std'
)
value_index
=
np
.
searchsorted
(
range
(
len
(
mean_values
)),
groups
)
mean_values
=
mean_values
[
value_index
]
std_values
=
std_values
[
value_index
]
return
(
x
-
mean_values
)
/
std_values
return
(
x
-
mean_values
)
/
std_values
else
:
else
:
...
...
alphamind/data/winsorize.py
View file @
c7150a2c
...
@@ -6,29 +6,20 @@ Created on 2017-4-25
...
@@ -6,29 +6,20 @@ Created on 2017-4-25
"""
"""
import
numpy
as
np
import
numpy
as
np
from
alphamind.data.impl
import
agg_mean
from
alphamind.data.impl
import
transform
from
alphamind.data.impl
import
agg_std
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
mean_values
=
agg_mean
(
groups
,
x
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
agg_std
(
groups
,
x
,
ddof
=
1
)
std_values
=
transform
(
groups
,
x
,
'std'
)
value_index
=
np
.
searchsorted
(
range
(
len
(
mean_values
)),
groups
)
ubound
=
mean_values
+
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
ubound
=
ubound
[
value_index
]
lbound
=
lbound
[
value_index
]
else
:
else
:
std_values
=
x
.
std
(
axis
=
0
)
std_values
=
x
.
std
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
ubound
=
mean_values
+
num_stds
*
std_values
ubound
=
mean_values
+
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
res
=
np
.
where
(
x
>
ubound
,
ubound
,
np
.
where
(
x
<
lbound
,
lbound
,
x
))
res
=
np
.
where
(
x
>
ubound
,
ubound
,
np
.
where
(
x
<
lbound
,
lbound
,
x
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment