Commit d94862d5 authored by Dr.李's avatar Dr.李

update benchmark and many numba function

parent 91f70b4e
...@@ -10,7 +10,68 @@ import numpy as np ...@@ -10,7 +10,68 @@ import numpy as np
import numba as nb import numba as nb
@nb.njit @nb.njit(nogil=True, cache=True)
def simple_sum(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for i in range(length):
for j in range(width):
res[j] += x[i, j]
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j]
return res
@nb.njit(nogil=True, cache=True)
def simple_mean(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for j in range(width):
for i in range(length):
res[j] += x[i, j]
res[j] /= length
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j]
res[i] /= width
return res
@nb.njit(nogil=True, cache=True)
def simple_std(x, axis=0, ddof=1):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
sum_mat = np.zeros(width)
for j in range(width):
for i in range(length):
res[j] += x[i, j] * x[i, j]
sum_mat[j] += x[i, j]
res[j] = math.sqrt((res[j] - sum_mat[j] * sum_mat[j] / length) / (length - ddof))
elif axis == 1:
res = np.zeros(length)
sum_mat = np.zeros(width)
for i in range(length):
for j in range(width):
res[i] += x[i, j] * x[i, j]
sum_mat[i] += x[i, j]
res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof))
return res
@nb.njit(nogil=True, cache=True)
def agg_sum(groups, x): def agg_sum(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
...@@ -22,7 +83,7 @@ def agg_sum(groups, x): ...@@ -22,7 +83,7 @@ def agg_sum(groups, x):
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def agg_abssum(groups, x): def agg_abssum(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
...@@ -34,7 +95,7 @@ def agg_abssum(groups, x): ...@@ -34,7 +95,7 @@ def agg_abssum(groups, x):
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def agg_mean(groups, x): def agg_mean(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
...@@ -53,7 +114,7 @@ def agg_mean(groups, x): ...@@ -53,7 +114,7 @@ def agg_mean(groups, x):
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def agg_std(groups, x, ddof=1): def agg_std(groups, x, ddof=1):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
...@@ -74,7 +135,7 @@ def agg_std(groups, x, ddof=1): ...@@ -74,7 +135,7 @@ def agg_std(groups, x, ddof=1):
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def copy_value(groups, source): def copy_value(groups, source):
length = groups.shape[0] length = groups.shape[0]
width = source.shape[1] width = source.shape[1]
...@@ -86,12 +147,12 @@ def copy_value(groups, source): ...@@ -86,12 +147,12 @@ def copy_value(groups, source):
return destination return destination
def transform(groups, x, func): def transform(groups, x, func, ddof=1):
if func == 'mean': if func == 'mean':
value_data = agg_mean(groups, x) value_data = agg_mean(groups, x)
elif func == 'std': elif func == 'std':
value_data = agg_std(groups, x, ddof=1) value_data = agg_std(groups, x, ddof=ddof)
elif func == 'sum': elif func == 'sum':
value_data = agg_sum(groups, x) value_data = agg_sum(groups, x)
elif func =='abssum': elif func =='abssum':
...@@ -102,11 +163,11 @@ def transform(groups, x, func): ...@@ -102,11 +163,11 @@ def transform(groups, x, func):
return copy_value(groups, value_data) return copy_value(groups, value_data)
def aggregate(groups, x, func): def aggregate(groups, x, func, ddof=1):
if func == 'mean': if func == 'mean':
value_data = agg_mean(groups, x) value_data = agg_mean(groups, x)
elif func == 'std': elif func == 'std':
value_data = agg_std(groups, x, ddof=1) value_data = agg_std(groups, x, ddof=ddof)
elif func == 'sum': elif func == 'sum':
value_data = agg_sum(groups, x) value_data = agg_sum(groups, x)
elif func =='abssum': elif func =='abssum':
......
...@@ -69,19 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -69,19 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray: def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T x_bar = x.T
b = solve(x_bar @ x, x_bar @ y) b = solve(x_bar @ x, x_bar @ y)
return b return b
@nb.njit @nb.njit(nogil=True, cache=True)
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray: def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b return y - x @ b
@nb.njit @nb.njit(nogil=True, cache=True)
def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray: def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
explained = np.zeros(x.shape + (b.shape[1],)) explained = np.zeros(x.shape + (b.shape[1],))
for i in range(b.shape[1]): for i in range(b.shape[1]):
......
...@@ -8,18 +8,20 @@ Created on 2017-4-25 ...@@ -8,18 +8,20 @@ Created on 2017-4-25
import numpy as np import numpy as np
from alphamind.groupby import group_mapping from alphamind.groupby import group_mapping
from alphamind.aggregate import transform from alphamind.aggregate import transform
from alphamind.aggregate import simple_mean
from alphamind.aggregate import simple_std
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
if groups is not None: if groups is not None:
groups = group_mapping(groups) groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std') std_values = transform(groups, x, 'std', ddof)
return (x - mean_values) / std_values return (x - mean_values) / std_values
else: else:
return (x - x.mean(axis=0)) / x.std(axis=0) return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -9,9 +9,11 @@ import numpy as np ...@@ -9,9 +9,11 @@ import numpy as np
import numba as nb import numba as nb
from alphamind.groupby import group_mapping from alphamind.groupby import group_mapping
from alphamind.aggregate import transform from alphamind.aggregate import transform
from alphamind.aggregate import simple_mean
from alphamind.aggregate import simple_std
@nb.njit @nb.njit(nogil=True, cache=True)
def mask_values_2d(x: np.ndarray, def mask_values_2d(x: np.ndarray,
mean_values: np.ndarray, mean_values: np.ndarray,
std_values: np.ndarray, std_values: np.ndarray,
...@@ -31,7 +33,7 @@ def mask_values_2d(x: np.ndarray, ...@@ -31,7 +33,7 @@ def mask_values_2d(x: np.ndarray,
return res return res
@nb.njit @nb.njit(nogil=True, cache=True)
def mask_values_1d(x: np.ndarray, def mask_values_1d(x: np.ndarray,
mean_values: np.ndarray, mean_values: np.ndarray,
std_values: np.ndarray, std_values: np.ndarray,
...@@ -57,10 +59,9 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None ...@@ -57,10 +59,9 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None
std_values = transform(groups, x, 'std') std_values = transform(groups, x, 'std')
res = mask_values_2d(x, mean_values, std_values, num_stds) res = mask_values_2d(x, mean_values, std_values, num_stds)
else: else:
std_values = x.std(axis=0) std_values = simple_std(x, axis=0)
mean_values = x.mean(axis=0) mean_values = simple_mean(x, axis=0)
res = mask_values_1d(x, mean_values, std_values, num_stds) res = mask_values_1d(x, mean_values, std_values, num_stds)
return res return res
......
...@@ -45,9 +45,9 @@ cpdef groupby(long[:] groups): ...@@ -45,9 +45,9 @@ cpdef groupby(long[:] groups):
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups): cpdef np.ndarray[long, ndim=1] group_mapping(long[:] groups):
cdef size_t length = groups.shape[0] cdef size_t length = groups.shape[0]
cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int) cdef np.ndarray[long, ndim=1] res= zeros(length, dtype=long)
cdef cpp_map[long, long] current_hold cdef cpp_map[long, long] current_hold
cdef long curr_tag cdef long curr_tag
cdef long running_tag = -1 cdef long running_tag = -1
......
...@@ -12,7 +12,7 @@ from numpy import zeros_like ...@@ -12,7 +12,7 @@ from numpy import zeros_like
from alphamind.groupby import groupby from alphamind.groupby import groupby
@nb.njit @nb.njit(nogil=True, cache=True)
def set_value(mat, used_level, to_fill): def set_value(mat, used_level, to_fill):
length, width = used_level.shape length, width = used_level.shape
for i in range(length): for i in range(length):
......
...@@ -8,19 +8,20 @@ Created on 2017-4-28 ...@@ -8,19 +8,20 @@ Created on 2017-4-28
import numpy as np import numpy as np
from alphamind.groupby import group_mapping from alphamind.groupby import group_mapping
from alphamind.aggregate import aggregate from alphamind.aggregate import aggregate
from alphamind.aggregate import simple_sum
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if ret_series.ndim > 1: if ret_series.ndim == 1:
ret_series = ret_series.flatten() ret_series = ret_series.reshape((-1, 1))
ret_mat = (ret_series * weights.T).T ret_mat = weights * ret_series
if groups is not None: if groups is not None:
groups = group_mapping(groups) groups = group_mapping(groups)
return aggregate(groups, ret_mat, 'sum') return aggregate(groups, ret_mat, 'sum')
else: else:
return ret_mat.sum(axis=0) return simple_sum(ret_mat, axis=0)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -19,7 +19,7 @@ class TestStandardize(unittest.TestCase): ...@@ -19,7 +19,7 @@ class TestStandardize(unittest.TestCase):
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_zscore = standardize(x) calc_zscore = standardize(x)
exp_zscore = zscore(x) exp_zscore = zscore(x, ddof=1)
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
...@@ -28,7 +28,7 @@ class TestStandardize(unittest.TestCase): ...@@ -28,7 +28,7 @@ class TestStandardize(unittest.TestCase):
groups = np.random.randint(10, 30, size=3000) groups = np.random.randint(10, 30, size=3000)
calc_zscore = standardize(x, groups) calc_zscore = standardize(x, groups)
exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0)) exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
......
...@@ -20,7 +20,7 @@ class TestWinsorize(unittest.TestCase): ...@@ -20,7 +20,7 @@ class TestWinsorize(unittest.TestCase):
calc_winsorized = winsorize_normal(x, num_stds) calc_winsorized = winsorize_normal(x, num_stds)
std_values = x.std(axis=0) std_values = x.std(axis=0, ddof=1)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
...@@ -42,7 +42,7 @@ class TestWinsorize(unittest.TestCase): ...@@ -42,7 +42,7 @@ class TestWinsorize(unittest.TestCase):
cal_winsorized = winsorize_normal(x, num_stds, groups) cal_winsorized = winsorize_normal(x, num_stds, groups)
def impl(x): def impl(x):
std_values = x.std(axis=0) std_values = x.std(axis=0, ddof=1)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment