first commit

3dbd0208 · Dr.李 · 3dbd0208 · 3dbd0208 · 3dbd0208 · 3dbd0208
Commit 3dbd0208 authored Apr 25, 2017 by Dr.李
15 changed files
--- a/alphamind/__init__.py
+++ b/alphamind/__init__.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/benchmarks/__init__.py
+++ b/alphamind/benchmarks/__init__.py
--- a/alphamind/benchmarks/neutralize.py
+++ b/alphamind/benchmarks/neutralize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import datetime as dt
+
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+from alphamind.data.neutralize import ls_fit
+
+
+def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
+    print("Starting least square fitting benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
+
+    y = np.random.randn(n_samples)
+    x = np.random.randn(n_samples, n_features)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = ls_fit(x, y)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        benchmark_model = LinearRegression(fit_intercept=False)
+        _ = benchmark_model.fit(x, y)
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+if __name__ == '__main__':
+    benchmark_neutralize(3000, 10, 1000)
--- a/alphamind/benchmarks/standardize.py
+++ b/alphamind/benchmarks/standardize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from scipy.stats import zscore
+from alphamind.data.standardize import standardize
+
+
+def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
+    print("-" * 60)
+    print("Starting standardizing benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
+
+    x = np.random.randn(n_samples, n_features)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = standardize(x)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = zscore(x)
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting standardizing with group-by values benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
+
+    x = np.random.randn(n_samples, n_features)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = standardize(x, groups=groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+if __name__ == '__main__':
+    benchmark_standardize(3000, 10, 1000)
+    benchmark_standardize_with_group(3000, 10, 1000, 30)
--- a/alphamind/data/__init__.py
+++ b/alphamind/data/__init__.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import numpy as np
+from numpy.linalg import solve
+
+
+def neutralize(x: np.ndarray, y: np.ndarray) -> np.ndarray:
+    b = ls_fit(x, y)
+    return ls_res(x, y, b)
+
+
+def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
+    x_bar = np.transpose(x)
+    b = solve(x_bar @ x, x_bar @ y)
+    return b
+
+
+def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
+    return y - x @ b
--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import numpy as np
+import pandas as pd
+
+
+def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+
+    if groups is not None:
+        df = pd.DataFrame(x)
+        gs = df.groupby(groups)
+
+        mean_values = gs.transform(np.mean).values
+        std_values = gs.transform(np.std).values
+        return (x - mean_values) / std_values
+    else:
+        return (x - x.mean(axis=0)) / x.std(axis=0)
+
--- a/alphamind/data/winsorize.py
+++ b/alphamind/data/winsorize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+
+def winsorize(x, num_stds=3, groups=None):
+
+    if groups is not None:
+        pass
+    else:
+        pass
--- a/alphamind/tests/__init__.py
+++ b/alphamind/tests/__init__.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/tests/test_neutralize.py
+++ b/alphamind/tests/test_neutralize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from alphamind.data.neutralize import neutralize
+
+
+class TestNeutralize(unittest.TestCase):
+
+    def test_neutralize(self):
+
+        y = np.random.randn(3000)
+        x = np.random.randn(3000, 10)
+
+        calc_res = neutralize(x, y)
+
+        model = LinearRegression(fit_intercept=False)
+        model.fit(x, y)
+
+        exp_res = y - x @ model.coef_
+
+        np.testing.assert_array_almost_equal(calc_res, exp_res)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/alphamind/tests/test_standardize.py
+++ b/alphamind/tests/test_standardize.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+import pandas as pd
+from scipy.stats import zscore
+from alphamind.data.standardize import standardize
+
+
+class TestStandardize(unittest.TestCase):
+
+    def test_standardize(self):
+
+        x = np.random.randn(3000, 10)
+
+        calc_zscore = standardize(x)
+        exp_zscore = zscore(x)
+
+        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
+        
+    def test_standardize_with_group(self):
+        x = np.random.randn(3000, 10)
+        groups = np.random.randint(30, size=3000)
+
+        calc_zscore = standardize(x, groups)
+        exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
+        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/alphamind/tests/test_suite.py
+++ b/alphamind/tests/test_suite.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+from alphamind.utilities import add_parent_path
+
+add_parent_path(__file__, 3)
+
+from alphamind.tests.test_neutralize import TestNeutralize
+from alphamind.tests.test_standardize import TestStandardize
+from alphamind.utilities import alpha_logger
+from alphamind.utilities import TestRunner
+
+
+if __name__ == '__main__':
+    runner = TestRunner([TestNeutralize,
+                         TestStandardize],
+                        alpha_logger)
+    runner.run()
\ No newline at end of file
--- a/alphamind/utilities.py
+++ b/alphamind/utilities.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import os
+import sys
+import logging
+import unittest
+
+
+alpha_logger = logging.getLogger('ALPHA_MIND')
+ch = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+alpha_logger.addHandler(ch)
+alpha_logger.setLevel(logging.INFO)
+
+
+def add_parent_path(name, level):
+    current_path = os.path.abspath(name)
+    sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level]))
+
+
+class TestRunner(object):
+
+    def __init__(self,
+                 test_cases,
+                 logger):
+
+        self.suite = unittest.TestSuite()
+        self.logger = logger
+
+        for case in test_cases:
+            tests = unittest.TestLoader().loadTestsFromTestCase(case)
+            self.suite.addTests(tests)
+
+    def run(self):
+
+        self.logger.info('Python ' + sys.version)
+
+        res = unittest.TextTestRunner(verbosity=3).run(self.suite)
+        if len(res.errors) >= 1 or len(res.failures) >= 1:
+            sys.exit(-1)
+        else:
+            sys.exit(0)
--- a/requirements.txt
+++ b/requirements.txt
+numpy >= 1.12.1
+scikit-learn >= 0.18.1
+scipy >= 0.19.0
+pandas >= 0.19.2
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+from setuptools import setup
+from setuptools import find_packages
+
+setup(
+    name='Alpha-Mind',
+    version='',
+    packages=find_packages(),
+    url='',
+    license='',
+    author='wegamekinglc',
+    author_email='',
+    description=''
+)