Commit d94862d5 authored by Dr.李's avatar Dr.李

update benchmark and many numba function

parent 91f70b4e
......@@ -10,7 +10,68 @@ import numpy as np
import numba as nb
@nb.njit
@nb.njit(nogil=True, cache=True)
def simple_sum(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for i in range(length):
for j in range(width):
res[j] += x[i, j]
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j]
return res
@nb.njit(nogil=True, cache=True)
def simple_mean(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for j in range(width):
for i in range(length):
res[j] += x[i, j]
res[j] /= length
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j]
res[i] /= width
return res
@nb.njit(nogil=True, cache=True)
def simple_std(x, axis=0, ddof=1):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
sum_mat = np.zeros(width)
for j in range(width):
for i in range(length):
res[j] += x[i, j] * x[i, j]
sum_mat[j] += x[i, j]
res[j] = math.sqrt((res[j] - sum_mat[j] * sum_mat[j] / length) / (length - ddof))
elif axis == 1:
res = np.zeros(length)
sum_mat = np.zeros(width)
for i in range(length):
for j in range(width):
res[i] += x[i, j] * x[i, j]
sum_mat[i] += x[i, j]
res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof))
return res
@nb.njit(nogil=True, cache=True)
def agg_sum(groups, x):
max_g = groups.max()
length, width = x.shape
......@@ -22,7 +83,7 @@ def agg_sum(groups, x):
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def agg_abssum(groups, x):
max_g = groups.max()
length, width = x.shape
......@@ -34,7 +95,7 @@ def agg_abssum(groups, x):
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def agg_mean(groups, x):
max_g = groups.max()
length, width = x.shape
......@@ -53,7 +114,7 @@ def agg_mean(groups, x):
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def agg_std(groups, x, ddof=1):
max_g = groups.max()
length, width = x.shape
......@@ -74,7 +135,7 @@ def agg_std(groups, x, ddof=1):
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def copy_value(groups, source):
length = groups.shape[0]
width = source.shape[1]
......@@ -86,12 +147,12 @@ def copy_value(groups, source):
return destination
def transform(groups, x, func):
def transform(groups, x, func, ddof=1):
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
value_data = agg_std(groups, x, ddof=ddof)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
......@@ -102,11 +163,11 @@ def transform(groups, x, func):
return copy_value(groups, value_data)
def aggregate(groups, x, func):
def aggregate(groups, x, func, ddof=1):
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
value_data = agg_std(groups, x, ddof=ddof)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
......
......@@ -69,19 +69,19 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T
b = solve(x_bar @ x, x_bar @ y)
return b
@nb.njit
@nb.njit(nogil=True, cache=True)
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b
@nb.njit
@nb.njit(nogil=True, cache=True)
def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
explained = np.zeros(x.shape + (b.shape[1],))
for i in range(b.shape[1]):
......
......@@ -8,18 +8,20 @@ Created on 2017-4-25
import numpy as np
from alphamind.groupby import group_mapping
from alphamind.aggregate import transform
from alphamind.aggregate import simple_mean
from alphamind.aggregate import simple_std
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
if groups is not None:
groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std')
std_values = transform(groups, x, 'std', ddof)
return (x - mean_values) / std_values
else:
return (x - x.mean(axis=0)) / x.std(axis=0)
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0)
if __name__ == '__main__':
......
......@@ -9,9 +9,11 @@ import numpy as np
import numba as nb
from alphamind.groupby import group_mapping
from alphamind.aggregate import transform
from alphamind.aggregate import simple_mean
from alphamind.aggregate import simple_std
@nb.njit
@nb.njit(nogil=True, cache=True)
def mask_values_2d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
......@@ -31,7 +33,7 @@ def mask_values_2d(x: np.ndarray,
return res
@nb.njit
@nb.njit(nogil=True, cache=True)
def mask_values_1d(x: np.ndarray,
mean_values: np.ndarray,
std_values: np.ndarray,
......@@ -57,10 +59,9 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, groups: np.ndarray = None
std_values = transform(groups, x, 'std')
res = mask_values_2d(x, mean_values, std_values, num_stds)
else:
std_values = x.std(axis=0)
mean_values = x.mean(axis=0)
std_values = simple_std(x, axis=0)
mean_values = simple_mean(x, axis=0)
res = mask_values_1d(x, mean_values, std_values, num_stds)
return res
......
......@@ -45,9 +45,9 @@ cpdef groupby(long[:] groups):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
cpdef np.ndarray[long, ndim=1] group_mapping(long[:] groups):
cdef size_t length = groups.shape[0]
cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
cdef np.ndarray[long, ndim=1] res= zeros(length, dtype=long)
cdef cpp_map[long, long] current_hold
cdef long curr_tag
cdef long running_tag = -1
......
......@@ -12,7 +12,7 @@ from numpy import zeros_like
from alphamind.groupby import groupby
@nb.njit
@nb.njit(nogil=True, cache=True)
def set_value(mat, used_level, to_fill):
length, width = used_level.shape
for i in range(length):
......
......@@ -8,19 +8,20 @@ Created on 2017-4-28
import numpy as np
from alphamind.groupby import group_mapping
from alphamind.aggregate import aggregate
from alphamind.aggregate import simple_sum
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if ret_series.ndim > 1:
ret_series = ret_series.flatten()
if ret_series.ndim == 1:
ret_series = ret_series.reshape((-1, 1))
ret_mat = (ret_series * weights.T).T
ret_mat = weights * ret_series
if groups is not None:
groups = group_mapping(groups)
return aggregate(groups, ret_mat, 'sum')
else:
return ret_mat.sum(axis=0)
return simple_sum(ret_mat, axis=0)
if __name__ == '__main__':
......
......@@ -19,7 +19,7 @@ class TestStandardize(unittest.TestCase):
x = np.random.randn(3000, 10)
calc_zscore = standardize(x)
exp_zscore = zscore(x)
exp_zscore = zscore(x, ddof=1)
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
......@@ -28,7 +28,7 @@ class TestStandardize(unittest.TestCase):
groups = np.random.randint(10, 30, size=3000)
calc_zscore = standardize(x, groups)
exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
......
......@@ -20,7 +20,7 @@ class TestWinsorize(unittest.TestCase):
calc_winsorized = winsorize_normal(x, num_stds)
std_values = x.std(axis=0)
std_values = x.std(axis=0, ddof=1)
mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values
......@@ -42,7 +42,7 @@ class TestWinsorize(unittest.TestCase):
cal_winsorized = winsorize_normal(x, num_stds, groups)
def impl(x):
std_values = x.std(axis=0)
std_values = x.std(axis=0, ddof=1)
mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment