Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
8ad5bdb1
Commit
8ad5bdb1
authored
Apr 25, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modified winsorized benchmark and tests
parent
3dbd0208
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
186 additions
and
3 deletions
+186
-3
benchmarks.py
alphamind/benchmarks/benchmarks.py
+21
-0
winsorize.py
alphamind/benchmarks/winsorize.py
+87
-0
winsorize.py
alphamind/data/winsorize.py
+18
-3
test_winsorize.py
alphamind/tests/test_winsorize.py
+60
-0
No files found.
alphamind/benchmarks/benchmarks.py
0 → 100644
View file @
8ad5bdb1
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
from
alphamind.benchmarks.neutralize
import
benchmark_neutralize
from
alphamind.benchmarks.standardize
import
benchmark_standardize
from
alphamind.benchmarks.standardize
import
benchmark_standardize_with_group
from
alphamind.benchmarks.winsorize
import
benchmark_winsorize_normal
from
alphamind.benchmarks.winsorize
import
benchmark_winsorize_normal_with_group
if
__name__
==
'__main__'
:
benchmark_neutralize
(
3000
,
10
,
1000
)
benchmark_standardize
(
3000
,
10
,
1000
)
benchmark_standardize_with_group
(
3000
,
10
,
100
,
30
)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
100
,
30
)
alphamind/benchmarks/winsorize.py
0 → 100644
View file @
8ad5bdb1
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import
datetime
as
dt
import
numpy
as
np
import
pandas
as
pd
from
alphamind.data.winsorize
import
winsorize_normal
def
benchmark_winsorize_normal
(
n_samples
:
int
,
n_features
:
int
,
n_loops
:
int
)
->
None
:
print
(
"-"
*
60
)
print
(
"Starting winsorize normal benchmarking"
)
print
(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})"
.
format
(
n_samples
,
n_features
,
n_loops
))
num_stds
=
2
x
=
np
.
random
.
randn
(
n_samples
,
n_features
)
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
winsorize_normal
(
x
,
num_stds
)
impl_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Implemented model'
,
impl_model_time
))
def
impl
(
x
):
std_values
=
x
.
std
(
axis
=
0
)
mean_value
=
x
.
mean
(
axis
=
0
)
lower_bound
=
mean_value
-
num_stds
*
std_values
upper_bound
=
mean_value
+
num_stds
*
std_values
res
=
np
.
where
(
x
>
upper_bound
,
upper_bound
,
x
)
res
=
np
.
where
(
res
<
lower_bound
,
lower_bound
,
res
)
return
res
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
impl
(
x
)
benchmark_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Benchmark model'
,
benchmark_model_time
))
def
benchmark_winsorize_normal_with_group
(
n_samples
:
int
,
n_features
:
int
,
n_loops
:
int
,
n_groups
:
int
)
->
None
:
print
(
"-"
*
60
)
print
(
"Starting winsorize normal with group-by values benchmarking"
)
print
(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})"
.
format
(
n_samples
,
n_features
,
n_loops
,
n_groups
))
num_stds
=
2
x
=
np
.
random
.
randn
(
n_samples
,
n_features
)
groups
=
np
.
random
.
randint
(
n_groups
,
size
=
n_samples
)
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
winsorize_normal
(
x
,
num_stds
,
groups
=
groups
)
impl_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Implemented model'
,
impl_model_time
))
def
impl
(
x
):
std_values
=
x
.
std
(
axis
=
0
)
mean_value
=
x
.
mean
(
axis
=
0
)
lower_bound
=
mean_value
-
num_stds
*
std_values
upper_bound
=
mean_value
+
num_stds
*
std_values
res
=
np
.
where
(
x
>
upper_bound
,
upper_bound
,
x
)
res
=
np
.
where
(
res
<
lower_bound
,
lower_bound
,
res
)
return
res
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
pd
.
DataFrame
(
x
)
.
groupby
(
groups
)
.
transform
(
impl
)
benchmark_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Benchmark model'
,
benchmark_model_time
))
if
__name__
==
'__main__'
:
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
alphamind/data/winsorize.py
View file @
8ad5bdb1
...
...
@@ -5,10 +5,25 @@ Created on 2017-4-25
@author: cheng.li
"""
import
pandas
as
pd
import
numpy
as
np
def
winsorize
(
x
,
num_stds
=
3
,
groups
=
None
):
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
pass
df
=
pd
.
DataFrame
(
x
)
gs
=
df
.
groupby
(
groups
)
mean_values
=
gs
.
transform
(
np
.
mean
)
.
values
std_values
=
gs
.
transform
(
np
.
std
)
.
values
else
:
pass
std_values
=
x
.
std
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
ubound
=
mean_values
+
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
res
=
np
.
where
(
x
>
ubound
,
ubound
,
x
)
res
=
np
.
where
(
res
<
lbound
,
lbound
,
res
)
return
res
alphamind/tests/test_winsorize.py
0 → 100644
View file @
8ad5bdb1
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import
unittest
import
numpy
as
np
import
pandas
as
pd
from
alphamind.data.winsorize
import
winsorize_normal
class
TestWinsorize
(
unittest
.
TestCase
):
def
test_winsorize_normal
(
self
):
num_stds
=
2
x
=
np
.
random
.
randn
(
3000
,
10
)
calc_winsorized
=
winsorize_normal
(
x
,
num_stds
)
std_values
=
x
.
std
(
axis
=
0
)
mean_value
=
x
.
mean
(
axis
=
0
)
lower_bound
=
mean_value
-
num_stds
*
std_values
upper_bound
=
mean_value
+
num_stds
*
std_values
for
i
in
range
(
np
.
size
(
calc_winsorized
,
1
)):
col_data
=
x
[:,
i
]
col_data
[
col_data
>
upper_bound
[
i
]]
=
upper_bound
[
i
]
col_data
[
col_data
<
lower_bound
[
i
]]
=
lower_bound
[
i
]
calculated_col
=
calc_winsorized
[:,
i
]
np
.
testing
.
assert_array_almost_equal
(
col_data
,
calculated_col
)
def
test_winsorize_normal_with_group
(
self
):
num_stds
=
2
x
=
np
.
random
.
randn
(
3000
,
10
)
groups
=
np
.
random
.
randint
(
30
,
size
=
3000
)
cal_winsorized
=
winsorize_normal
(
x
,
num_stds
,
groups
)
def
impl
(
x
):
std_values
=
x
.
std
(
axis
=
0
)
mean_value
=
x
.
mean
(
axis
=
0
)
lower_bound
=
mean_value
-
num_stds
*
std_values
upper_bound
=
mean_value
+
num_stds
*
std_values
res
=
np
.
where
(
x
>
upper_bound
,
upper_bound
,
x
)
res
=
np
.
where
(
res
<
lower_bound
,
lower_bound
,
res
)
return
res
exp_winsorized
=
pd
.
DataFrame
(
x
)
.
groupby
(
groups
)
.
transform
(
impl
)
.
values
np
.
testing
.
assert_array_almost_equal
(
cal_winsorized
,
exp_winsorized
)
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment