added more benchmark for neutralize

83b11bc0 · Dr.李 · 5d4b11f8 · 83b11bc0 · 83b11bc0
Commit 83b11bc0 authored May 01, 2017 by Dr.李
Hide whitespace changes
Inline Side-by-side

Showing with 42 additions and 3 deletions

benchmarks.py alphamind/benchmarks/benchmarks.py +5 -1

neutralize.py alphamind/benchmarks/data/neutralize.py +37 -2

No files found.
--- a/alphamind/benchmarks/benchmarks.py
+++ b/alphamind/benchmarks/benchmarks.py
@@ -6,6 +6,7 @@ Created on 2017-4-25
 """

 from alphamind.benchmarks.data.neutralize import benchmark_neutralize
+from alphamind.benchmarks.data.neutralize import benchmark_neutralize_with_groups
 from alphamind.benchmarks.data.standardize import benchmark_standardize
 from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
 from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
@@ -19,8 +20,11 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
 if __name__ == '__main__':

    benchmark_neutralize(3000, 10, 1000)
-    benchmark_neutralize(30, 10, 50000)
+    benchmark_neutralize_with_groups(3000, 10, 1000, 30)
+    benchmark_neutralize(30, 3, 50000)
+    benchmark_neutralize_with_groups(30, 3, 50000, 3)
    benchmark_neutralize(50000, 50, 20)
+    benchmark_neutralize_with_groups(50000, 50, 20, 50)
    benchmark_standardize(3000, 10, 1000)
    benchmark_standardize_with_group(3000, 10, 1000, 30)
    benchmark_standardize(30, 10, 50000)

--- a/alphamind/benchmarks/data/neutralize.py
+++ b/alphamind/benchmarks/data/neutralize.py
@@ -23,7 +23,7 @@ def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:

    start = dt.datetime.now()
    for _ in range(n_loops):
-        _ = neutralize(x, y)
+        calc_res = neutralize(x, y)
    impl_model_time = dt.datetime.now() - start

    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
@@ -32,10 +32,45 @@ def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
    for _ in range(n_loops):
        benchmark_model = LinearRegression(fit_intercept=False)
        benchmark_model.fit(x, y)
-        _ = y - x @ benchmark_model.coef_.T
+        exp_res = y - x @ benchmark_model.coef_.T
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+    np.testing.assert_array_almost_equal(calc_res, exp_res)
+
+
+def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting least square fitting with group benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
+                                                                                            n_features,
+                                                                                            n_loops,
+                                                                                            n_groups))
+    y = np.random.randn(n_samples, 5)
+    x = np.random.randn(n_samples, n_features)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = neutralize(x, y, groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+
+    model = LinearRegression(fit_intercept=False)
+    for _ in range(n_loops):
+        for i in range(n_groups):
+            curr_x = x[groups == i]
+            curr_y = y[groups == i]
+            model.fit(curr_x, curr_y)
+            _ = curr_y - curr_x @ model.coef_.T
    benchmark_model_time = dt.datetime.now() - start

    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))

 if __name__ == '__main__':
    benchmark_neutralize(3000, 10, 1000)
+    benchmark_neutralize_with_groups(3000, 10, 1000, 30)