Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
1a6908af
Commit
1a6908af
authored
Jul 09, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update data utilities
parent
55b727e7
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
39 deletions
+12
-39
neutralize.py
alphamind/data/neutralize.py
+2
-2
standardize.py
alphamind/data/standardize.py
+7
-34
winsorize.py
alphamind/data/winsorize.py
+2
-2
test_winsorize.py
alphamind/tests/data/test_winsorize.py
+1
-1
No files found.
alphamind/data/neutralize.py
View file @
1a6908af
...
...
@@ -87,8 +87,8 @@ def _sub_step(x, y, w, curr_idx, res):
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
ls_fit
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
,
w
:
np
.
ndarray
)
->
np
.
ndarray
:
x_bar
=
x
.
T
b
=
np
.
linalg
.
solve
(
x_bar
*
w
@
x
,
x_bar
*
w
@
y
)
x_bar
=
x
.
T
*
w
b
=
np
.
linalg
.
solve
(
x_bar
@
x
,
x_bar
@
y
)
return
b
...
...
alphamind/data/standardize.py
View file @
1a6908af
...
...
@@ -9,12 +9,9 @@ import numpy as np
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
array_index
from
numba
import
jitclass
from
numba
import
int32
,
float64
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
...
...
@@ -31,59 +28,35 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
class
Standardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
def
__init__
(
self
,
ddof
:
int
=
1
):
self
.
ddof_
=
ddof
self
.
mean_
=
None
self
.
std_
=
None
def
fit
(
self
,
x
):
def
fit
(
self
,
x
:
np
.
ndarray
):
self
.
mean_
=
simple_mean
(
x
,
axis
=
0
)
self
.
std_
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof_
)
def
transform
(
self
,
x
)
:
def
transform
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
return
(
x
-
self
.
mean_
)
/
self
.
std_
class
GroupedStandardizer
(
object
):
def
__init__
(
self
,
ddof
=
1
):
def
__init__
(
self
,
ddof
:
int
=
1
):
self
.
labels_
=
None
self
.
mean_
=
None
self
.
std_
=
None
self
.
ddof_
=
ddof
def
fit
(
self
,
x
):
def
fit
(
self
,
x
:
np
.
ndarray
):
raw_groups
=
x
[:,
0
]
.
astype
(
int
)
groups
=
group_mapping
(
raw_groups
)
self
.
mean_
=
aggregate
(
groups
,
x
[:,
1
:],
'mean'
)
self
.
std_
=
aggregate
(
groups
,
x
[:,
1
:],
'std'
,
self
.
ddof_
)
self
.
labels_
=
np
.
unique
(
raw_groups
)
def
transform
(
self
,
x
)
:
def
transform
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
groups
=
x
[:,
0
]
.
astype
(
int
)
index
=
array_index
(
self
.
labels_
,
groups
)
return
(
x
[:,
1
:]
-
self
.
mean_
[
index
])
/
self
.
std_
[
index
]
if
__name__
==
'__main__'
:
import
datetime
as
dt
x_value
=
np
.
random
.
randn
(
1000
,
3
)
groups
=
np
.
random
.
randint
(
20
,
size
=
1000
)
x
=
np
.
concatenate
([
groups
.
reshape
((
-
1
,
1
)),
x_value
],
axis
=
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
x1
=
standardize
(
x_value
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
s
=
GroupedStandardizer
(
1
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
s
.
fit
(
x
)
x2
=
s
.
transform
(
x
)
print
(
dt
.
datetime
.
now
()
-
start
)
np
.
testing
.
assert_array_almost_equal
(
x1
,
x2
)
\ No newline at end of file
alphamind/data/winsorize.py
View file @
1a6908af
...
...
@@ -52,14 +52,14 @@ def mask_values_1d(x: np.ndarray,
return
res
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
ddof
=
1
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
else
:
std_values
=
simple_std
(
x
,
axis
=
0
)
std_values
=
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
mean_values
=
simple_mean
(
x
,
axis
=
0
)
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
return
res
alphamind/tests/data/test_winsorize.py
View file @
1a6908af
...
...
@@ -36,7 +36,7 @@ class TestWinsorize(unittest.TestCase):
np
.
testing
.
assert_array_almost_equal
(
col_data
,
calculated_col
)
def
test_winsorize_normal_with_group
(
self
):
cal_winsorized
=
winsorize_normal
(
self
.
x
,
self
.
num_stds
,
self
.
groups
)
cal_winsorized
=
winsorize_normal
(
self
.
x
,
self
.
num_stds
,
groups
=
self
.
groups
)
def
impl
(
x
):
std_values
=
x
.
std
(
axis
=
0
,
ddof
=
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment