Commit 2990e8c3 authored by Dr.李's avatar Dr.李

improve the performance of winsorize

parent 5ea39a77
......@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30)
benchmark_neutralize(30, 3, 50000)
benchmark_neutralize_with_groups(30, 3, 50000, 3)
benchmark_neutralize(50000, 50, 20)
benchmark_neutralize_with_groups(50000, 50, 20, 50)
benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group(50000, 50, 20, 50)
# benchmark_neutralize(3000, 10, 1000)
# benchmark_neutralize_with_groups(3000, 10, 1000, 30)
# benchmark_neutralize(30, 3, 50000)
# benchmark_neutralize_with_groups(30, 3, 50000, 3)
# benchmark_neutralize(50000, 50, 20)
# benchmark_neutralize_with_groups(50000, 50, 20, 50)
# benchmark_standardize(3000, 10, 1000)
# benchmark_standardize_with_group(3000, 10, 1000, 30)
# benchmark_standardize(30, 10, 50000)
# benchmark_standardize_with_group(30, 10, 5000, 5)
# benchmark_standardize(50000, 50, 20)
# benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group(30, 10, 50000, 5)
benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group(50000, 50, 20, 50)
# benchmark_build_rank(3000, 1000, 300)
# benchmark_build_rank_with_group(3000, 1000, 10, 30)
# benchmark_build_rank(30, 50000, 3)
# benchmark_build_rank_with_group(30, 50000, 1, 3)
# benchmark_build_rank(50000, 20, 3000)
# benchmark_build_rank_with_group(50000, 20, 10, 300)
# benchmark_simple_settle(3000, 10, 1000)
# benchmark_simple_settle_with_group(3000, 10, 1000, 30)
# benchmark_simple_settle(30, 10, 50000)
# benchmark_simple_settle_with_group(30, 10, 50000, 5)
# benchmark_simple_settle(50000, 50, 20)
# benchmark_simple_settle_with_group(50000, 50, 20, 50)
......@@ -6,6 +6,7 @@ Created on 2017-4-25
"""
import numpy as np
import numba as nb
from numpy import zeros
from numpy.linalg import solve
from typing import Tuple
......@@ -68,16 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res
@nb.njit
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T
b = solve(x_bar @ x, x_bar @ y)
return b
@nb.njit
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b
@nb.njit
def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
explained = np.zeros(x.shape + (b.shape[1],))
for i in range(b.shape[1]):
......
......@@ -6,24 +6,60 @@ Created on 2017-4-25
"""
import numpy as np
import numba as nb
from alphamind.groupby import group_mapping
from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
@nb.njit
def mask_values_2d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
num_stds: int = 3) -> np.ndarray:
res = x.copy()
length, width = x.shape
for i in range(length):
for j in range(width):
ubound = mean_values[i, j] + num_stds * std_values[i, j]
lbound = mean_values[i, j] - num_stds * std_values[i, j]
if x[i, j] > ubound:
res[i, j] = ubound
elif x[i, j] < lbound:
res[i, j] = lbound
return res
@nb.njit
def mask_values_1d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
num_stds: int = 3) -> np.ndarray:
res = x.copy()
length, width = x.shape
for j in range(width):
ubound = mean_values[j] + num_stds * std_values[j]
lbound = mean_values[j] - num_stds * std_values[j]
for i in range(length):
if x[i, j] > ubound:
res[i, j] = ubound
elif x[i, j] < lbound:
res[i, j] = lbound
return res
def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None) -> np.ndarray:
if groups is not None:
groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std')
res = mask_values_2d(x, mean_values, std_values, num_stds)
else:
std_values = x.std(axis=0)
mean_values = x.mean(axis=0)
ubound = mean_values + num_stds * std_values
lbound = mean_values - num_stds * std_values
res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x))
res = mask_values_1d(x, mean_values, std_values, num_stds)
return res
......@@ -32,5 +68,9 @@ if __name__ == '__main__':
x = np.random.randn(3000, 10)
groups = np.random.randint(0, 20, size=3000)
for _ in range(1000):
winsorize_normal(x, 2, groups)
\ No newline at end of file
import datetime as dt
start = dt.datetime.now()
for _ in range(3000):
winsorize_normal(x, 2, groups)
print(dt.datetime.now() - start)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment