Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
70baa5f1
Commit
70baa5f1
authored
Jun 25, 2018
by
wegamekinglc
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update xgboost stamp
parent
8a4af8cd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
162 additions
and
162 deletions
+162
-162
standardize.py
alphamind/data/standardize.py
+65
-65
winsorize.py
alphamind/data/winsorize.py
+96
-96
xgboost
xgboost
+1
-1
No files found.
alphamind/data/standardize.py
View file @
70baa5f1
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
"""
Created on 2017-4-25
Created on 2017-4-25
@author: cheng.li
@author: cheng.li
"""
"""
import
numpy
as
np
import
numpy
as
np
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
simple_sqrsum
from
alphamind.utilities
import
simple_sqrsum
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
,
ddof
)
std_values
=
transform
(
groups
,
x
,
'std'
,
ddof
)
return
(
x
-
mean_values
)
/
np
.
maximum
(
std_values
,
1e-8
)
return
(
x
-
mean_values
)
/
np
.
maximum
(
std_values
,
1e-8
)
else
:
else
:
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
np
.
maximum
(
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
),
1e-8
)
return
(
x
-
simple_mean
(
x
,
axis
=
0
))
/
np
.
maximum
(
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
),
1e-8
)
def
projection
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
axis
=
1
)
->
np
.
ndarray
:
def
projection
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
axis
=
1
)
->
np
.
ndarray
:
if
groups
is
not
None
and
axis
==
0
:
if
groups
is
not
None
and
axis
==
0
:
groups
=
group_mapping
(
groups
)
groups
=
group_mapping
(
groups
)
projected
=
transform
(
groups
,
x
,
'project'
)
projected
=
transform
(
groups
,
x
,
'project'
)
return
projected
return
projected
else
:
else
:
return
x
/
simple_sqrsum
(
x
,
axis
=
axis
)
.
reshape
((
-
1
,
1
))
return
x
/
simple_sqrsum
(
x
,
axis
=
axis
)
.
reshape
((
-
1
,
1
))
class
Standardizer
(
object
):
class
Standardizer
(
object
):
def
__init__
(
self
,
ddof
:
int
=
1
):
def
__init__
(
self
,
ddof
:
int
=
1
):
self
.
ddof
=
ddof
self
.
ddof
=
ddof
self
.
mean
=
None
self
.
mean
=
None
self
.
std
=
None
self
.
std
=
None
self
.
labels
=
None
self
.
labels
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
if
groups
is
not
None
:
if
groups
is
not
None
:
group_index
=
group_mapping
(
groups
)
group_index
=
group_mapping
(
groups
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
labels
=
np
.
unique
(
groups
)
self
.
labels
=
np
.
unique
(
groups
)
else
:
else
:
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
index
=
array_index
(
self
.
labels
,
groups
)
index
=
array_index
(
self
.
labels
,
groups
)
return
(
x
-
self
.
mean
[
index
])
/
np
.
maximum
(
self
.
std
[
index
],
1e-8
)
return
(
x
-
self
.
mean
[
index
])
/
np
.
maximum
(
self
.
std
[
index
],
1e-8
)
else
:
else
:
return
(
x
-
self
.
mean
)
/
np
.
maximum
(
self
.
std
,
1e-8
)
return
(
x
-
self
.
mean
)
/
np
.
maximum
(
self
.
std
,
1e-8
)
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
return
standardize
(
x
,
groups
,
self
.
ddof
)
return
standardize
(
x
,
groups
,
self
.
ddof
)
alphamind/data/winsorize.py
View file @
70baa5f1
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
"""
Created on 2017-4-25
Created on 2017-4-25
@author: cheng.li
@author: cheng.li
"""
"""
import
numpy
as
np
import
numpy
as
np
import
numba
as
nb
import
numba
as
nb
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
group_mapping
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
aggregate
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
transform
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
array_index
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_mean
from
alphamind.utilities
import
simple_std
from
alphamind.utilities
import
simple_std
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
mask_values_2d
(
x
:
np
.
ndarray
,
def
mask_values_2d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
res
=
x
.
copy
()
length
,
width
=
x
.
shape
length
,
width
=
x
.
shape
for
i
in
range
(
length
):
for
i
in
range
(
length
):
for
j
in
range
(
width
):
for
j
in
range
(
width
):
ubound
=
mean_values
[
i
,
j
]
+
num_stds
*
std_values
[
i
,
j
]
ubound
=
mean_values
[
i
,
j
]
+
num_stds
*
std_values
[
i
,
j
]
lbound
=
mean_values
[
i
,
j
]
-
num_stds
*
std_values
[
i
,
j
]
lbound
=
mean_values
[
i
,
j
]
-
num_stds
*
std_values
[
i
,
j
]
if
x
[
i
,
j
]
>
ubound
:
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
res
[
i
,
j
]
=
lbound
return
res
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
mask_values_1d
(
x
:
np
.
ndarray
,
def
mask_values_1d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
res
=
x
.
copy
()
length
,
width
=
x
.
shape
length
,
width
=
x
.
shape
for
j
in
range
(
width
):
for
j
in
range
(
width
):
ubound
=
mean_values
[
j
]
+
num_stds
*
std_values
[
j
]
ubound
=
mean_values
[
j
]
+
num_stds
*
std_values
[
j
]
lbound
=
mean_values
[
j
]
-
num_stds
*
std_values
[
j
]
lbound
=
mean_values
[
j
]
-
num_stds
*
std_values
[
j
]
for
i
in
range
(
length
):
for
i
in
range
(
length
):
if
x
[
i
,
j
]
>
ubound
:
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
res
[
i
,
j
]
=
lbound
return
res
return
res
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
ddof
=
1
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
ddof
=
1
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
,
ddof
)
std_values
=
transform
(
groups
,
x
,
'std'
,
ddof
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
else
:
else
:
std_values
=
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
std_values
=
simple_std
(
x
,
axis
=
0
,
ddof
=
ddof
)
mean_values
=
simple_mean
(
x
,
axis
=
0
)
mean_values
=
simple_mean
(
x
,
axis
=
0
)
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
return
res
return
res
class
NormalWinsorizer
(
object
):
class
NormalWinsorizer
(
object
):
def
__init__
(
self
,
num_stds
:
int
=
3
,
ddof
=
1
):
def
__init__
(
self
,
num_stds
:
int
=
3
,
ddof
=
1
):
self
.
num_stds
=
num_stds
self
.
num_stds
=
num_stds
self
.
ddof
=
ddof
self
.
ddof
=
ddof
self
.
mean
=
None
self
.
mean
=
None
self
.
std
=
None
self
.
std
=
None
self
.
labels
=
None
self
.
labels
=
None
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
def
fit
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
):
if
groups
is
not
None
:
if
groups
is
not
None
:
group_index
=
group_mapping
(
groups
)
group_index
=
group_mapping
(
groups
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
mean
=
aggregate
(
group_index
,
x
,
'mean'
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
std
=
aggregate
(
group_index
,
x
,
'std'
,
self
.
ddof
)
self
.
labels
=
np
.
unique
(
groups
)
self
.
labels
=
np
.
unique
(
groups
)
else
:
else
:
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
mean
=
simple_mean
(
x
,
axis
=
0
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
self
.
std
=
simple_std
(
x
,
axis
=
0
,
ddof
=
self
.
ddof
)
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
transform
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
index
=
array_index
(
self
.
labels
,
groups
)
index
=
array_index
(
self
.
labels
,
groups
)
return
mask_values_2d
(
x
,
self
.
mean
[
index
],
self
.
std
[
index
],
self
.
num_stds
)
return
mask_values_2d
(
x
,
self
.
mean
[
index
],
self
.
std
[
index
],
self
.
num_stds
)
else
:
else
:
return
mask_values_1d
(
x
,
self
.
mean
,
self
.
std
,
self
.
num_stds
)
return
mask_values_1d
(
x
,
self
.
mean
,
self
.
std
,
self
.
num_stds
)
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
def
__call__
(
self
,
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
return
winsorize_normal
(
x
,
self
.
num_stds
,
self
.
ddof
,
groups
)
return
winsorize_normal
(
x
,
self
.
num_stds
,
self
.
ddof
,
groups
)
\ No newline at end of file
xgboost
@
5cd851cc
Subproject commit
a96039141a8dd2d053ae8f415c8fc8e52223216e
Subproject commit
5cd851ccef8cf0a0a71094d0d0e33a9d102f1f55
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment