Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
6eb178dc
Commit
6eb178dc
authored
Jul 03, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added scikit-learn compatible standardize
parent
25b755fe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
139 additions
and
3 deletions
+139
-3
.gitignore
.gitignore
+3
-0
standardize.py
alphamind/data/standardize.py
+66
-1
test_standardize.py
alphamind/tests/data/test_standardize.py
+43
-2
utilities.py
alphamind/utilities.py
+27
-0
No files found.
.gitignore
View file @
6eb178dc
...
@@ -7,3 +7,6 @@ Alpha_Mind.egg-info/*
...
@@ -7,3 +7,6 @@ Alpha_Mind.egg-info/*
*.c
*.c
*.cpp
*.cpp
*.html
*.html
*.nbc
*.nbi
/notebooks/.ipynb_checkpoints
\ No newline at end of file
alphamind/data/standardize.py
View file @
6eb178dc
...
@@ -8,8 +8,13 @@ Created on 2017-4-25
...
@@ -8,8 +8,13 @@ Created on 2017-4-25
import
numpy
as
np
import
numpy
as
np
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
array_index
from
numba
import
jitclass
from
numba
import
int32
,
float64
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
...
@@ -21,4 +26,64 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
...
@@ -21,4 +26,64 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return
(
x
-
mean_values
)
/
std_values
return
(
x
-
mean_values
)
/
std_values
else
:
else
:
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
simple_std
(
x
,
axis
=
0
)
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
class
Standardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
self
.
ddof_
=
ddof
self
.
mean_
=
None
self
.
std_
=
None
def
fit
(
self
,
x
):
self
.
mean_
=
simple_mean
(
x
,
axis
=
0
)
self
.
std_
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof_
)
def
transform
(
self
,
x
):
return
(
x
-
self
.
mean_
)
/
self
.
std_
class
GroupedStandardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
self
.
labels_
=
None
self
.
mean_
=
None
self
.
std_
=
None
self
.
ddof_
=
ddof
def
fit
(
self
,
x
):
raw_groups
=
x
[:,
0
]
.
astype
(
int
)
groups
=
group_mapping
(
raw_groups
)
self
.
mean_
=
aggregate
(
groups
,
x
[:,
1
:],
'mean'
)
self
.
std_
=
aggregate
(
groups
,
x
[:,
1
:],
'std'
,
self
.
ddof_
)
self
.
labels_
=
np
.
unique
(
raw_groups
)
def
transform
(
self
,
x
):
groups
=
x
[:,
0
]
.
astype
(
int
)
index
=
array_index
(
self
.
labels_
,
groups
)
return
(
x
[:,
1
:]
-
self
.
mean_
[
index
])
/
self
.
std_
[
index
]
if
__name__
==
'__main__'
:
import
datetime
as
dt
x_value
=
np
.
random
.
randn
(
1000
,
3
)
groups
=
np
.
random
.
randint
(
20
,
size
=
1000
)
x
=
np
.
concatenate
([
groups
.
reshape
((
-
1
,
1
)),
x_value
],
axis
=
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
x1
=
standardize
(
x_value
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
s
=
GroupedStandardizer
(
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s
.
fit
(
x
)
x2
=
s
.
transform
(
x
)
print
(
dt
.
datetime
.
now
()
-
start
)
np
.
testing
.
assert_array_almost_equal
(
x1
,
x2
)
\ No newline at end of file
alphamind/tests/data/test_standardize.py
View file @
6eb178dc
...
@@ -10,6 +10,8 @@ import numpy as np
...
@@ -10,6 +10,8 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
from
scipy.stats
import
zscore
from
scipy.stats
import
zscore
from
alphamind.data.standardize
import
standardize
from
alphamind.data.standardize
import
standardize
from
alphamind.data.standardize
import
Standardizer
from
alphamind.data.standardize
import
GroupedStandardizer
class
TestStandardize
(
unittest
.
TestCase
):
class
TestStandardize
(
unittest
.
TestCase
):
...
@@ -23,7 +25,7 @@ class TestStandardize(unittest.TestCase):
...
@@ -23,7 +25,7 @@ class TestStandardize(unittest.TestCase):
exp_zscore
=
zscore
(
self
.
x
,
ddof
=
1
)
exp_zscore
=
zscore
(
self
.
x
,
ddof
=
1
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_standardize_with_group
(
self
):
def
test_standardize_with_group
(
self
):
calc_zscore
=
standardize
(
self
.
x
,
self
.
groups
)
calc_zscore
=
standardize
(
self
.
x
,
self
.
groups
)
exp_zscore
=
pd
.
DataFrame
(
self
.
x
)
.
\
exp_zscore
=
pd
.
DataFrame
(
self
.
x
)
.
\
...
@@ -31,6 +33,45 @@ class TestStandardize(unittest.TestCase):
...
@@ -31,6 +33,45 @@ class TestStandardize(unittest.TestCase):
transform
(
lambda
s
:
(
s
-
s
.
mean
(
axis
=
0
))
/
s
.
std
(
axis
=
0
,
ddof
=
1
))
transform
(
lambda
s
:
(
s
-
s
.
mean
(
axis
=
0
))
/
s
.
std
(
axis
=
0
,
ddof
=
1
))
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_standardizer
(
self
):
s
=
Standardizer
()
s
.
fit
(
self
.
x
)
calc_zscore
=
s
.
transform
(
self
.
x
)
exp_zscore
=
standardize
(
self
.
x
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
def
test_groupedstandardizer
(
self
):
x
=
np
.
concatenate
([
self
.
groups
.
reshape
((
-
1
,
1
)),
self
.
x
],
axis
=
1
)
s
=
GroupedStandardizer
()
s
.
fit
(
x
)
calc_zscore
=
s
.
transform
(
x
)
exp_zscore
=
standardize
(
self
.
x
,
self
.
groups
)
np
.
testing
.
assert_array_almost_equal
(
calc_zscore
,
exp_zscore
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
import
datetime
as
dt
from
sklearn.preprocessing
import
StandardScaler
x
=
np
.
random
.
randn
(
1000
,
2
)
y
=
np
.
random
.
randn
(
50
,
2
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s1
=
StandardScaler
()
s1
.
fit
(
x
)
x1
=
s1
.
transform
(
y
)
print
(
dt
.
datetime
.
now
()
-
start
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s2
=
Standardizer
(
ddof
=
0
)
s2
.
fit
(
x
)
x2
=
s2
.
transform
(
y
)
print
(
dt
.
datetime
.
now
()
-
start
)
np
.
testing
.
assert_array_almost_equal
(
x1
,
x2
)
\ No newline at end of file
alphamind/utilities.py
View file @
6eb178dc
...
@@ -219,6 +219,21 @@ def scale_value(groups, source, x, scale):
...
@@ -219,6 +219,21 @@ def scale_value(groups, source, x, scale):
return
destination
return
destination
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
array_index
(
array
,
items
):
to_look_length
=
items
.
shape
[
0
]
arr_length
=
array
.
shape
[
0
]
res
=
np
.
zeros
(
to_look_length
,
dtype
=
array
.
dtype
)
for
i
in
range
(
to_look_length
):
for
j
in
range
(
arr_length
):
if
items
[
i
]
==
array
[
j
]:
res
[
i
]
=
j
break
return
res
def
transform
(
groups
:
np
.
ndarray
,
def
transform
(
groups
:
np
.
ndarray
,
x
:
np
.
ndarray
,
x
:
np
.
ndarray
,
func
:
str
,
func
:
str
,
...
@@ -255,3 +270,15 @@ def aggregate(groups, x, func, ddof=1):
...
@@ -255,3 +270,15 @@ def aggregate(groups, x, func, ddof=1):
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
value_data
return
value_data
if
__name__
==
'__main__'
:
x1
=
np
.
random
.
randint
(
30
,
size
=
1000
)
array
=
np
.
unique
(
x1
)
x2
=
np
.
random
.
randint
(
30
,
size
=
1000
)
res
=
array_index
(
array
,
x2
)
print
(
res
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment