Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
1214d5ec
Commit
1214d5ec
authored
Jun 13, 2018
by
wegamekinglc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
f3f3e92d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
57 additions
and
32 deletions
+57
-32
standardize.py
alphamind/data/standardize.py
+24
-31
winsorize.py
alphamind/data/winsorize.py
+33
-1
No files found.
alphamind/data/standardize.py
View file @
1214d5ec
...
...
@@ -39,34 +39,27 @@ def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
class
Standardizer
(
object
):
def
__init__
(
self
,
ddof
:
int
=
1
):
self
.
ddof_
=
ddof
self
.
mean_
=
None
self
.
std_
=
None
self
.
ddof
=
ddof
self
.
mean
=
None
self
.
std
=
None
self
.
labels
=
None
def
fit
(
self
,
x
:
np
.
ndarray
):
self
.
mean_
=
simple_mean
(
x
,
axis
=
0
)
self
.
std_
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof_
)
def
transform
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
return
(
x
-
self
.
mean_
)
/
np
.
maximum
(
self
.
std_
,
1e-8
)
class
GroupedStandardizer
(
object
):
def
__init__
(
self
,
ddof
:
int
=
1
):
self
.
labels_
=
None
self
.
mean_
=
None
self
.
std_
=
None
self
.
ddof_
=
ddof
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
if
groups
is
not
None
:
group_index
=
group_mapping
(
groups
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
labels
=
np
.
unique
(
groups
)
else
:
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
def
fit
(
self
,
x
:
np
.
ndarray
)
:
raw_groups
=
x
[:,
0
]
.
astype
(
int
)
groups
=
group_mapping
(
raw_
groups
)
self
.
mean_
=
aggregate
(
groups
,
x
[:,
1
:],
'mean'
)
self
.
std_
=
aggregate
(
groups
,
x
[:,
1
:],
'std'
,
self
.
ddof_
)
self
.
labels_
=
np
.
unique
(
raw_groups
)
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
index
=
array_index
(
self
.
labels
,
groups
)
return
(
x
-
self
.
mean
[
index
])
/
np
.
maximum
(
self
.
std
[
index
],
1e-8
)
else
:
return
(
x
-
self
.
mean
)
/
np
.
maximum
(
self
.
std
,
1e-8
)
def
transform
(
self
,
x
:
np
.
ndarray
)
->
np
.
ndarray
:
groups
=
x
[:,
0
]
.
astype
(
int
)
index
=
array_index
(
self
.
labels_
,
groups
)
return
(
x
[:,
1
:]
-
self
.
mean_
[
index
])
/
np
.
maximum
(
self
.
std_
[
index
],
1e-8
)
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
return
standardize
(
x
,
groups
,
self
.
ddof
)
alphamind/data/winsorize.py
View file @
1214d5ec
...
...
@@ -8,7 +8,9 @@ Created on 2017-4-25
import
numpy
as
np
import
numba
as
nb
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
...
...
@@ -56,10 +58,40 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarra
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
)
std_values
=
transform
(
groups
,
x
,
'std'
,
ddof
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
else
:
std_values
=
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
mean_values
=
simple_mean
(
x
,
axis
=
0
)
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
return
res
class
NormalWinsorizer
(
object
):
def
__init__
(
self
,
num_stds
:
int
=
3
,
ddof
=
1
):
self
.
num_stds
=
num_stds
self
.
ddof
=
ddof
self
.
mean
=
None
self
.
std
=
None
self
.
labels
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
if
groups
is
not
None
:
group_index
=
group_mapping
(
groups
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
labels
=
np
.
unique
(
groups
)
else
:
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
index
=
array_index
(
self
.
labels
,
groups
)
return
mask_values_2d
(
x
,
self
.
mean
[
index
],
self
.
std
[
index
],
self
.
num_stds
)
else
:
return
mask_values_1d
(
x
,
self
.
mean
,
self
.
std
,
self
.
num_stds
)
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
return
winsorize_normal
(
x
,
self
.
num_stds
,
self
.
ddof
,
groups
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment