Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
6eb178dc
Commit
6eb178dc
authored
Jul 03, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added scikit-learn compatible standardize
parent
25b755fe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
139 additions
and
3 deletions
+139
-3
.gitignore
.gitignore
+3
-0
standardize.py
alphamind/data/standardize.py
+66
-1
test_standardize.py
alphamind/tests/data/test_standardize.py
+43
-2
utilities.py
alphamind/utilities.py
+27
-0
No files found.
.gitignore
View file @
6eb178dc
...
...
@@ -7,3 +7,6 @@ Alpha_Mind.egg-info/*
*.c
*.cpp
*.html
*.nbc
*.nbi
/notebooks/.ipynb_checkpoints
\ No newline at end of file
alphamind/data/standardize.py
View file @
6eb178dc
...
...
@@ -8,8 +8,13 @@ Created on 2017-4-25
import
numpy
as
np
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
array_index
from
numba
import
jitclass
from
numba
import
int32
,
float64
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
...
...
@@ -21,4 +26,64 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return
(
x
-
mean_values
)
/
std_values
else
:
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
simple_std
(
x
,
axis
=
0
)
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
class
Standardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
self
.
ddof_
=
ddof
self
.
mean_
=
None
self
.
std_
=
None
def
fit
(
self
,
x
):
self
.
mean_
=
simple_mean
(
x
,
axis
=
0
)
self
.
std_
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof_
)
def
transform
(
self
,
x
):
return
(
x
-
self
.
mean_
)
/
self
.
std_
class
GroupedStandardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
self
.
labels_
=
None
self
.
mean_
=
None
self
.
std_
=
None
self
.
ddof_
=
ddof
def
fit
(
self
,
x
):
raw_groups
=
x
[:,
0
]
.
astype
(
int
)
groups
=
group_mapping
(
raw_groups
)
self
.
mean_
=
aggregate
(
groups
,
x
[:,
1
:],
'mean'
)
self
.
std_
=
aggregate
(
groups
,
x
[:,
1
:],
'std'
,
self
.
ddof_
)
self
.
labels_
=
np
.
unique
(
raw_groups
)
def
transform
(
self
,
x
):
groups
=
x
[:,
0
]
.
astype
(
int
)
index
=
array_index
(
self
.
labels_
,
groups
)
return
(
x
[:,
1
:]
-
self
.
mean_
[
index
])
/
self
.
std_
[
index
]
if
__name__
==
'__main__'
:
import
datetime
as
dt
x_value
=
np
.
random
.
randn
(
1000
,
3
)
groups
=
np
.
random
.
randint
(
20
,
size
=
1000
)
x
=
np
.
concatenate
([
groups
.
reshape
((
-
1
,
1
)),
x_value
],
axis
=
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
x1
=
standardize
(
x_value
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
s
=
GroupedStandardizer
(
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s
.
fit
(
x
)
x2
=
s
.
transform
(
x
)
print
(
dt
.
datetime
.
now
()
-
start
)
np
.
testing
.
assert_array_almost_equal
(
x1
,
x2
)
\ No newline at end of file
alphamind/tests/data/test_standardize.py
View file @
6eb178dc
...
...
@@ -10,6 +10,8 @@ import numpy as np
import
pandas
as
pd
from
scipy.stats
import
zscore
from
alphamind.data.standardize
import
standardize
from
alphamind.data.standardize
import
Standardizer
from
alphamind.data.standardize
import
GroupedStandardizer
class
TestStandardize
(
unittest
.
TestCase
):
...
...
@@ -23,7 +25,7 @@ class TestStandardize(unittest.TestCase):
exp_zscore
=
zscore
(
self
.
x
,
ddof
=
1
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_standardize_with_group
(
self
):
calc_zscore
=
standardize
(
self
.
x
,
self
.
groups
)
exp_zscore
=
pd
.
DataFrame
(
self
.
x
)
.
\
...
...
@@ -31,6 +33,45 @@ class TestStandardize(unittest.TestCase):
transform
(
lambda
s
:
(
s
-
s
.
mean
(
axis
=
0
))
/
s
.
std
(
axis
=
0
,
ddof
=
1
))
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_standardizer
(
self
):
s
=
Standardizer
()
s
.
fit
(
self
.
x
)
calc_zscore
=
s
.
transform
(
self
.
x
)
exp_zscore
=
standardize
(
self
.
x
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_groupedstandardizer
(
self
):
x
=
np
.
concatenate
([
self
.
groups
.
reshape
((
-
1
,
1
)),
self
.
x
],
axis
=
1
)
s
=
GroupedStandardizer
()
s
.
fit
(
x
)
calc_zscore
=
s
.
transform
(
x
)
exp_zscore
=
standardize
(
self
.
x
,
self
.
groups
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
if
__name__
==
'__main__'
:
unittest
.
main
()
import
datetime
as
dt
from
sklearn.preprocessing
import
StandardScaler
x
=
np
.
random
.
randn
(
1000
,
2
)
y
=
np
.
random
.
randn
(
50
,
2
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s1
=
StandardScaler
()
s1
.
fit
(
x
)
x1
=
s1
.
transform
(
y
)
print
(
dt
.
datetime
.
now
()
-
start
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s2
=
Standardizer
(
ddof
=
0
)
s2
.
fit
(
x
)
x2
=
s2
.
transform
(
y
)
print
(
dt
.
datetime
.
now
()
-
start
)
np
.
testing
.
assert_array_almost_equal
(
x1
,
x2
)
\ No newline at end of file
alphamind/utilities.py
View file @
6eb178dc
...
...
@@ -219,6 +219,21 @@ def scale_value(groups, source, x, scale):
return
destination
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
array_index
(
array
,
items
):
to_look_length
=
items
.
shape
[
0
]
arr_length
=
array
.
shape
[
0
]
res
=
np
.
zeros
(
to_look_length
,
dtype
=
array
.
dtype
)
for
i
in
range
(
to_look_length
):
for
j
in
range
(
arr_length
):
if
items
[
i
]
==
array
[
j
]:
res
[
i
]
=
j
break
return
res
def
transform
(
groups
:
np
.
ndarray
,
x
:
np
.
ndarray
,
func
:
str
,
...
...
@@ -255,3 +270,15 @@ def aggregate(groups, x, func, ddof=1):
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
value_data
if
__name__
==
'__main__'
:
x1
=
np
.
random
.
randint
(
30
,
size
=
1000
)
array
=
np
.
unique
(
x1
)
x2
=
np
.
random
.
randint
(
30
,
size
=
1000
)
res
=
array_index
(
array
,
x2
)
print
(
res
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment