Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2990e8c3
Commit
2990e8c3
authored
May 03, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve the performance of winsorize
parent
5ea39a77
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
32 deletions
+76
-32
benchmarks.py
alphamind/benchmarks/benchmarks.py
+24
-24
neutralize.py
alphamind/data/neutralize.py
+4
-0
winsorize.py
alphamind/data/winsorize.py
+48
-8
No files found.
alphamind/benchmarks/benchmarks.py
View file @
2990e8c3
...
...
@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if
__name__
==
'__main__'
:
benchmark_neutralize
(
3000
,
10
,
1000
)
benchmark_neutralize_with_groups
(
3000
,
10
,
1000
,
30
)
benchmark_neutralize
(
30
,
3
,
50000
)
benchmark_neutralize_with_groups
(
30
,
3
,
50000
,
3
)
benchmark_neutralize
(
50000
,
50
,
20
)
benchmark_neutralize_with_groups
(
50000
,
50
,
20
,
50
)
benchmark_standardize
(
3000
,
10
,
1000
)
benchmark_standardize_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_standardize
(
30
,
10
,
50000
)
benchmark_standardize_with_group
(
30
,
10
,
5000
,
5
)
benchmark_standardize
(
50000
,
50
,
20
)
benchmark_standardize_with_group
(
50000
,
50
,
20
,
50
)
#
benchmark_neutralize(3000, 10, 1000)
#
benchmark_neutralize_with_groups(3000, 10, 1000, 30)
#
benchmark_neutralize(30, 3, 50000)
#
benchmark_neutralize_with_groups(30, 3, 50000, 3)
#
benchmark_neutralize(50000, 50, 20)
#
benchmark_neutralize_with_groups(50000, 50, 20, 50)
#
benchmark_standardize(3000, 10, 1000)
#
benchmark_standardize_with_group(3000, 10, 1000, 30)
#
benchmark_standardize(30, 10, 50000)
#
benchmark_standardize_with_group(30, 10, 5000, 5)
#
benchmark_standardize(50000, 50, 20)
#
benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_winsorize_normal
(
30
,
10
,
50000
)
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal
(
50000
,
50
,
20
)
benchmark_winsorize_normal_with_group
(
50000
,
50
,
20
,
50
)
benchmark_build_rank
(
3000
,
1000
,
300
)
benchmark_build_rank_with_group
(
3000
,
1000
,
10
,
30
)
benchmark_build_rank
(
30
,
50000
,
3
)
benchmark_build_rank_with_group
(
30
,
50000
,
1
,
3
)
benchmark_build_rank
(
50000
,
20
,
3000
)
benchmark_build_rank_with_group
(
50000
,
20
,
10
,
300
)
benchmark_simple_settle
(
3000
,
10
,
1000
)
benchmark_simple_settle_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_simple_settle
(
30
,
10
,
50000
)
benchmark_simple_settle_with_group
(
30
,
10
,
50000
,
5
)
benchmark_simple_settle
(
50000
,
50
,
20
)
benchmark_simple_settle_with_group
(
50000
,
50
,
20
,
50
)
#
benchmark_build_rank(3000, 1000, 300)
#
benchmark_build_rank_with_group(3000, 1000, 10, 30)
#
benchmark_build_rank(30, 50000, 3)
#
benchmark_build_rank_with_group(30, 50000, 1, 3)
#
benchmark_build_rank(50000, 20, 3000)
#
benchmark_build_rank_with_group(50000, 20, 10, 300)
#
benchmark_simple_settle(3000, 10, 1000)
#
benchmark_simple_settle_with_group(3000, 10, 1000, 30)
#
benchmark_simple_settle(30, 10, 50000)
#
benchmark_simple_settle_with_group(30, 10, 50000, 5)
#
benchmark_simple_settle(50000, 50, 20)
#
benchmark_simple_settle_with_group(50000, 50, 20, 50)
alphamind/data/neutralize.py
View file @
2990e8c3
...
...
@@ -6,6 +6,7 @@ Created on 2017-4-25
"""
import
numpy
as
np
import
numba
as
nb
from
numpy
import
zeros
from
numpy.linalg
import
solve
from
typing
import
Tuple
...
...
@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return
res
@
nb
.
njit
def
ls_fit
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
)
->
np
.
ndarray
:
x_bar
=
x
.
T
b
=
solve
(
x_bar
@
x
,
x_bar
@
y
)
return
b
@
nb
.
njit
def
ls_res
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
return
y
-
x
@
b
@
nb
.
njit
def
ls_explain
(
x
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
explained
=
np
.
zeros
(
x
.
shape
+
(
b
.
shape
[
1
],))
for
i
in
range
(
b
.
shape
[
1
]):
...
...
alphamind/data/winsorize.py
View file @
2990e8c3
...
...
@@ -6,24 +6,60 @@ Created on 2017-4-25
"""
import
numpy
as
np
import
numba
as
nb
from
alphamind.groupby
import
group_mapping
from
alphamind.aggregate
import
transform
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
@
nb
.
njit
def
mask_values_2d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
length
,
width
=
x
.
shape
for
i
in
range
(
length
):
for
j
in
range
(
width
):
ubound
=
mean_values
[
i
,
j
]
+
num_stds
*
std_values
[
i
,
j
]
lbound
=
mean_values
[
i
,
j
]
-
num_stds
*
std_values
[
i
,
j
]
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
return
res
@
nb
.
njit
def
mask_values_1d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
length
,
width
=
x
.
shape
for
j
in
range
(
width
):
ubound
=
mean_values
[
j
]
+
num_stds
*
std_values
[
j
]
lbound
=
mean_values
[
j
]
-
num_stds
*
std_values
[
j
]
for
i
in
range
(
length
):
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
return
res
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
else
:
std_values
=
x
.
std
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
ubound
=
mean_values
+
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
res
=
np
.
where
(
x
>
ubound
,
ubound
,
np
.
where
(
x
<
lbound
,
lbound
,
x
))
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
return
res
...
...
@@ -32,5 +68,9 @@ if __name__ == '__main__':
x
=
np
.
random
.
randn
(
3000
,
10
)
groups
=
np
.
random
.
randint
(
0
,
20
,
size
=
3000
)
for
_
in
range
(
1000
):
winsorize_normal
(
x
,
2
,
groups
)
\ No newline at end of file
import
datetime
as
dt
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
3000
):
winsorize_normal
(
x
,
2
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment