Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
2990e8c3
Commit
2990e8c3
authored
May 03, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
improve the performance of winsorize
parent
5ea39a77
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
76 additions
and
32 deletions
+76
-32
benchmarks.py
alphamind/benchmarks/benchmarks.py
+24
-24
neutralize.py
alphamind/data/neutralize.py
+4
-0
winsorize.py
alphamind/data/winsorize.py
+48
-8
No files found.
alphamind/benchmarks/benchmarks.py
View file @
2990e8c3
...
@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
...
@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
benchmark_neutralize
(
3000
,
10
,
1000
)
#
benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups
(
3000
,
10
,
1000
,
30
)
#
benchmark_neutralize_with_groups(3000, 10, 1000, 30)
benchmark_neutralize
(
30
,
3
,
50000
)
#
benchmark_neutralize(30, 3, 50000)
benchmark_neutralize_with_groups
(
30
,
3
,
50000
,
3
)
#
benchmark_neutralize_with_groups(30, 3, 50000, 3)
benchmark_neutralize
(
50000
,
50
,
20
)
#
benchmark_neutralize(50000, 50, 20)
benchmark_neutralize_with_groups
(
50000
,
50
,
20
,
50
)
#
benchmark_neutralize_with_groups(50000, 50, 20, 50)
benchmark_standardize
(
3000
,
10
,
1000
)
#
benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group
(
3000
,
10
,
1000
,
30
)
#
benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize
(
30
,
10
,
50000
)
#
benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group
(
30
,
10
,
5000
,
5
)
#
benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize
(
50000
,
50
,
20
)
#
benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group
(
50000
,
50
,
20
,
50
)
#
benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal
(
3000
,
10
,
1000
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_winsorize_normal_with_group
(
3000
,
10
,
1000
,
30
)
benchmark_winsorize_normal
(
30
,
10
,
50000
)
benchmark_winsorize_normal
(
30
,
10
,
50000
)
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal
(
50000
,
50
,
20
)
benchmark_winsorize_normal
(
50000
,
50
,
20
)
benchmark_winsorize_normal_with_group
(
50000
,
50
,
20
,
50
)
benchmark_winsorize_normal_with_group
(
50000
,
50
,
20
,
50
)
benchmark_build_rank
(
3000
,
1000
,
300
)
#
benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group
(
3000
,
1000
,
10
,
30
)
#
benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank
(
30
,
50000
,
3
)
#
benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group
(
30
,
50000
,
1
,
3
)
#
benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank
(
50000
,
20
,
3000
)
#
benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group
(
50000
,
20
,
10
,
300
)
#
benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle
(
3000
,
10
,
1000
)
#
benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group
(
3000
,
10
,
1000
,
30
)
#
benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle
(
30
,
10
,
50000
)
#
benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group
(
30
,
10
,
50000
,
5
)
#
benchmark_simple_settle_with_group(30, 10, 50000, 5)
benchmark_simple_settle
(
50000
,
50
,
20
)
#
benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group
(
50000
,
50
,
20
,
50
)
#
benchmark_simple_settle_with_group(50000, 50, 20, 50)
alphamind/data/neutralize.py
View file @
2990e8c3
...
@@ -6,6 +6,7 @@ Created on 2017-4-25
...
@@ -6,6 +6,7 @@ Created on 2017-4-25
"""
"""
import
numpy
as
np
import
numpy
as
np
import
numba
as
nb
from
numpy
import
zeros
from
numpy
import
zeros
from
numpy.linalg
import
solve
from
numpy.linalg
import
solve
from
typing
import
Tuple
from
typing
import
Tuple
...
@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
...
@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return
res
return
res
@
nb
.
njit
def
ls_fit
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
)
->
np
.
ndarray
:
def
ls_fit
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
)
->
np
.
ndarray
:
x_bar
=
x
.
T
x_bar
=
x
.
T
b
=
solve
(
x_bar
@
x
,
x_bar
@
y
)
b
=
solve
(
x_bar
@
x
,
x_bar
@
y
)
return
b
return
b
@
nb
.
njit
def
ls_res
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
def
ls_res
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
return
y
-
x
@
b
return
y
-
x
@
b
@
nb
.
njit
def
ls_explain
(
x
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
def
ls_explain
(
x
:
np
.
ndarray
,
b
:
np
.
ndarray
)
->
np
.
ndarray
:
explained
=
np
.
zeros
(
x
.
shape
+
(
b
.
shape
[
1
],))
explained
=
np
.
zeros
(
x
.
shape
+
(
b
.
shape
[
1
],))
for
i
in
range
(
b
.
shape
[
1
]):
for
i
in
range
(
b
.
shape
[
1
]):
...
...
alphamind/data/winsorize.py
View file @
2990e8c3
...
@@ -6,24 +6,60 @@ Created on 2017-4-25
...
@@ -6,24 +6,60 @@ Created on 2017-4-25
"""
"""
import
numpy
as
np
import
numpy
as
np
import
numba
as
nb
from
alphamind.groupby
import
group_mapping
from
alphamind.groupby
import
group_mapping
from
alphamind.aggregate
import
transform
from
alphamind.aggregate
import
transform
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
@
nb
.
njit
def
mask_values_2d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
length
,
width
=
x
.
shape
for
i
in
range
(
length
):
for
j
in
range
(
width
):
ubound
=
mean_values
[
i
,
j
]
+
num_stds
*
std_values
[
i
,
j
]
lbound
=
mean_values
[
i
,
j
]
-
num_stds
*
std_values
[
i
,
j
]
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
return
res
@
nb
.
njit
def
mask_values_1d
(
x
:
np
.
ndarray
,
mean_values
:
np
.
ndarray
,
std_values
:
np
.
ndarray
,
num_stds
:
int
=
3
)
->
np
.
ndarray
:
res
=
x
.
copy
()
length
,
width
=
x
.
shape
for
j
in
range
(
width
):
ubound
=
mean_values
[
j
]
+
num_stds
*
std_values
[
j
]
lbound
=
mean_values
[
j
]
-
num_stds
*
std_values
[
j
]
for
i
in
range
(
length
):
if
x
[
i
,
j
]
>
ubound
:
res
[
i
,
j
]
=
ubound
elif
x
[
i
,
j
]
<
lbound
:
res
[
i
,
j
]
=
lbound
return
res
def
winsorize_normal
(
x
:
np
.
ndarray
,
num_stds
:
int
=
3
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
if
groups
is
not
None
:
if
groups
is
not
None
:
groups
=
group_mapping
(
groups
)
groups
=
group_mapping
(
groups
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
mean_values
=
transform
(
groups
,
x
,
'mean'
)
std_values
=
transform
(
groups
,
x
,
'std'
)
std_values
=
transform
(
groups
,
x
,
'std'
)
res
=
mask_values_2d
(
x
,
mean_values
,
std_values
,
num_stds
)
else
:
else
:
std_values
=
x
.
std
(
axis
=
0
)
std_values
=
x
.
std
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
mean_values
=
x
.
mean
(
axis
=
0
)
res
=
mask_values_1d
(
x
,
mean_values
,
std_values
,
num_stds
)
ubound
=
mean_values
+
num_stds
*
std_values
lbound
=
mean_values
-
num_stds
*
std_values
res
=
np
.
where
(
x
>
ubound
,
ubound
,
np
.
where
(
x
<
lbound
,
lbound
,
x
))
return
res
return
res
...
@@ -32,5 +68,9 @@ if __name__ == '__main__':
...
@@ -32,5 +68,9 @@ if __name__ == '__main__':
x
=
np
.
random
.
randn
(
3000
,
10
)
x
=
np
.
random
.
randn
(
3000
,
10
)
groups
=
np
.
random
.
randint
(
0
,
20
,
size
=
3000
)
groups
=
np
.
random
.
randint
(
0
,
20
,
size
=
3000
)
for
_
in
range
(
1000
):
import
datetime
as
dt
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
3000
):
winsorize_normal
(
x
,
2
,
groups
)
winsorize_normal
(
x
,
2
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment