Commit 2990e8c3 authored by Dr.李's avatar Dr.李

improve the performance of winsorize

parent 5ea39a77
...@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle ...@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) # benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30) # benchmark_neutralize_with_groups(3000, 10, 1000, 30)
benchmark_neutralize(30, 3, 50000) # benchmark_neutralize(30, 3, 50000)
benchmark_neutralize_with_groups(30, 3, 50000, 3) # benchmark_neutralize_with_groups(30, 3, 50000, 3)
benchmark_neutralize(50000, 50, 20) # benchmark_neutralize(50000, 50, 20)
benchmark_neutralize_with_groups(50000, 50, 20, 50) # benchmark_neutralize_with_groups(50000, 50, 20, 50)
benchmark_standardize(3000, 10, 1000) # benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30) # benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize(30, 10, 50000) # benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group(30, 10, 5000, 5) # benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize(50000, 50, 20) # benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group(50000, 50, 20, 50) # benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000) benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000) benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5) benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20) benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50) benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300) # benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30) # benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3) # benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3) # benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000) # benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300) # benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle(3000, 10, 1000) # benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group(3000, 10, 1000, 30) # benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle(30, 10, 50000) # benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group(30, 10, 50000, 5) # benchmark_simple_settle_with_group(30, 10, 50000, 5)
benchmark_simple_settle(50000, 50, 20) # benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group(50000, 50, 20, 50) # benchmark_simple_settle_with_group(50000, 50, 20, 50)
...@@ -6,6 +6,7 @@ Created on 2017-4-25 ...@@ -6,6 +6,7 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
import numba as nb
from numpy import zeros from numpy import zeros
from numpy.linalg import solve from numpy.linalg import solve
from typing import Tuple from typing import Tuple
...@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res return res
@nb.njit
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray: def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T x_bar = x.T
b = solve(x_bar @ x, x_bar @ y) b = solve(x_bar @ x, x_bar @ y)
return b return b
@nb.njit
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray: def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b return y - x @ b
@nb.njit
def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray: def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
explained = np.zeros(x.shape + (b.shape[1],)) explained = np.zeros(x.shape + (b.shape[1],))
for i in range(b.shape[1]): for i in range(b.shape[1]):
......
...@@ -6,24 +6,60 @@ Created on 2017-4-25 ...@@ -6,24 +6,60 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
import numba as nb
from alphamind.groupby import group_mapping from alphamind.groupby import group_mapping
from alphamind.aggregate import transform from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray: @nb.njit
def mask_values_2d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
num_stds: int = 3) -> np.ndarray:
res = x.copy()
length, width = x.shape
for i in range(length):
for j in range(width):
ubound = mean_values[i, j] + num_stds * std_values[i, j]
lbound = mean_values[i, j] - num_stds * std_values[i, j]
if x[i, j] > ubound:
res[i, j] = ubound
elif x[i, j] < lbound:
res[i, j] = lbound
return res
@nb.njit
def mask_values_1d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
num_stds: int = 3) -> np.ndarray:
res = x.copy()
length, width = x.shape
for j in range(width):
ubound = mean_values[j] + num_stds * std_values[j]
lbound = mean_values[j] - num_stds * std_values[j]
for i in range(length):
if x[i, j] > ubound:
res[i, j] = ubound
elif x[i, j] < lbound:
res[i, j] = lbound
return res
def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None) -> np.ndarray:
if groups is not None: if groups is not None:
groups = group_mapping(groups) groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std') std_values = transform(groups, x, 'std')
res = mask_values_2d(x, mean_values, std_values, num_stds)
else: else:
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_values = x.mean(axis=0) mean_values = x.mean(axis=0)
res = mask_values_1d(x, mean_values, std_values, num_stds)
ubound = mean_values + num_stds * std_values
lbound = mean_values - num_stds * std_values
res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x))
return res return res
...@@ -32,5 +68,9 @@ if __name__ == '__main__': ...@@ -32,5 +68,9 @@ if __name__ == '__main__':
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(0, 20, size=3000) groups = np.random.randint(0, 20, size=3000)
for _ in range(1000): import datetime as dt
start = dt.datetime.now()
for _ in range(3000):
winsorize_normal(x, 2, groups) winsorize_normal(x, 2, groups)
print(dt.datetime.now() - start)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment