Commit 91f70b4e authored by Dr.李's avatar Dr.李

improve the performance of winsorize

parent 2990e8c3
...@@ -75,16 +75,18 @@ def agg_std(groups, x, ddof=1): ...@@ -75,16 +75,18 @@ def agg_std(groups, x, ddof=1):
@nb.njit @nb.njit
def set_value(groups, source, destinantion): def copy_value(groups, source):
length, width = destinantion.shape length = groups.shape[0]
width = source.shape[1]
destination = np.zeros((length, width))
for i in range(length): for i in range(length):
k = groups[i] k = groups[i]
for j in range(width): for j in range(width):
destinantion[i, j] = source[k, j] destination[i, j] = source[k, j]
return destination
def transform(groups, x, func): def transform(groups, x, func):
res = np.zeros_like(x)
if func == 'mean': if func == 'mean':
value_data = agg_mean(groups, x) value_data = agg_mean(groups, x)
...@@ -97,8 +99,7 @@ def transform(groups, x, func): ...@@ -97,8 +99,7 @@ def transform(groups, x, func):
else: else:
raise ValueError('({0}) is not recognized as valid functor'.format(func)) raise ValueError('({0}) is not recognized as valid functor'.format(func))
set_value(groups, value_data, res) return copy_value(groups, value_data)
return res
def aggregate(groups, x, func): def aggregate(groups, x, func):
......
...@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle ...@@ -19,33 +19,33 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if __name__ == '__main__': if __name__ == '__main__':
# benchmark_neutralize(3000, 10, 1000) benchmark_neutralize(3000, 10, 1000)
# benchmark_neutralize_with_groups(3000, 10, 1000, 30) benchmark_neutralize_with_groups(3000, 10, 1000, 30)
# benchmark_neutralize(30, 3, 50000) benchmark_neutralize(30, 3, 50000)
# benchmark_neutralize_with_groups(30, 3, 50000, 3) benchmark_neutralize_with_groups(30, 3, 50000, 3)
# benchmark_neutralize(50000, 50, 20) benchmark_neutralize(50000, 50, 20)
# benchmark_neutralize_with_groups(50000, 50, 20, 50) benchmark_neutralize_with_groups(50000, 50, 20, 50)
# benchmark_standardize(3000, 10, 1000) benchmark_standardize(3000, 10, 1000)
# benchmark_standardize_with_group(3000, 10, 1000, 30) benchmark_standardize_with_group(3000, 10, 1000, 30)
# benchmark_standardize(30, 10, 50000) benchmark_standardize(100, 10, 50000)
# benchmark_standardize_with_group(30, 10, 5000, 5) benchmark_standardize_with_group(100, 10, 5000, 4)
# benchmark_standardize(50000, 50, 20) benchmark_standardize(50000, 50, 20)
# benchmark_standardize_with_group(50000, 50, 20, 50) benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000) benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000) benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5) benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20) benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50) benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
# benchmark_build_rank(3000, 1000, 300) benchmark_build_rank(3000, 1000, 300)
# benchmark_build_rank_with_group(3000, 1000, 10, 30) benchmark_build_rank_with_group(3000, 1000, 10, 30)
# benchmark_build_rank(30, 50000, 3) benchmark_build_rank(30, 50000, 3)
# benchmark_build_rank_with_group(30, 50000, 1, 3) benchmark_build_rank_with_group(30, 50000, 1, 3)
# benchmark_build_rank(50000, 20, 3000) benchmark_build_rank(50000, 20, 3000)
# benchmark_build_rank_with_group(50000, 20, 10, 300) benchmark_build_rank_with_group(50000, 20, 10, 300)
# benchmark_simple_settle(3000, 10, 1000) benchmark_simple_settle(3000, 10, 1000)
# benchmark_simple_settle_with_group(3000, 10, 1000, 30) benchmark_simple_settle_with_group(3000, 10, 1000, 30)
# benchmark_simple_settle(30, 10, 50000) benchmark_simple_settle(30, 10, 50000)
# benchmark_simple_settle_with_group(30, 10, 50000, 5) benchmark_simple_settle_with_group(30, 10, 50000, 5)
# benchmark_simple_settle(50000, 50, 20) benchmark_simple_settle(50000, 50, 20)
# benchmark_simple_settle_with_group(50000, 50, 20, 50) benchmark_simple_settle_with_group(50000, 50, 20, 50)
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment