Commit 3dbd0208 authored by Dr.李's avatar Dr.李

first commit

parents
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import datetime as dt
import numpy as np
from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import ls_fit
def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
print("Starting least square fitting benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
y = np.random.randn(n_samples)
x = np.random.randn(n_samples, n_features)
start = dt.datetime.now()
for _ in range(n_loops):
_ = ls_fit(x, y)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
benchmark_model = LinearRegression(fit_intercept=False)
_ = benchmark_model.fit(x, y)
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import datetime as dt
import numpy as np
import pandas as pd
from scipy.stats import zscore
from alphamind.data.standardize import standardize
def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60)
print("Starting standardizing benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
x = np.random.randn(n_samples, n_features)
start = dt.datetime.now()
for _ in range(n_loops):
_ = standardize(x)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
_ = zscore(x)
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
print("-" * 60)
print("Starting standardizing with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now()
for _ in range(n_loops):
_ = standardize(x, groups=groups)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
_ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__':
benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
from numpy.linalg import solve
def neutralize(x: np.ndarray, y: np.ndarray) -> np.ndarray:
b = ls_fit(x, y)
return ls_res(x, y, b)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = np.transpose(x)
b = solve(x_bar @ x, x_bar @ y)
return b
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
import pandas as pd
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if groups is not None:
df = pd.DataFrame(x)
gs = df.groupby(groups)
mean_values = gs.transform(np.mean).values
std_values = gs.transform(np.std).values
return (x - mean_values) / std_values
else:
return (x - x.mean(axis=0)) / x.std(axis=0)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
def winsorize(x, num_stds=3, groups=None):
if groups is not None:
pass
else:
pass
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import unittest
import numpy as np
from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import neutralize
class TestNeutralize(unittest.TestCase):
def test_neutralize(self):
y = np.random.randn(3000)
x = np.random.randn(3000, 10)
calc_res = neutralize(x, y)
model = LinearRegression(fit_intercept=False)
model.fit(x, y)
exp_res = y - x @ model.coef_
np.testing.assert_array_almost_equal(calc_res, exp_res)
if __name__ == '__main__':
unittest.main()
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import unittest
import numpy as np
import pandas as pd
from scipy.stats import zscore
from alphamind.data.standardize import standardize
class TestStandardize(unittest.TestCase):
def test_standardize(self):
x = np.random.randn(3000, 10)
calc_zscore = standardize(x)
exp_zscore = zscore(x)
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
def test_standardize_with_group(self):
x = np.random.randn(3000, 10)
groups = np.random.randint(30, size=3000)
calc_zscore = standardize(x, groups)
exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
if __name__ == '__main__':
unittest.main()
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
from alphamind.utilities import add_parent_path
add_parent_path(__file__, 3)
from alphamind.tests.test_neutralize import TestNeutralize
from alphamind.tests.test_standardize import TestStandardize
from alphamind.utilities import alpha_logger
from alphamind.utilities import TestRunner
if __name__ == '__main__':
runner = TestRunner([TestNeutralize,
TestStandardize],
alpha_logger)
runner.run()
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
import os
import sys
import logging
import unittest
alpha_logger = logging.getLogger('ALPHA_MIND')
ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
alpha_logger.addHandler(ch)
alpha_logger.setLevel(logging.INFO)
def add_parent_path(name, level):
current_path = os.path.abspath(name)
sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level]))
class TestRunner(object):
def __init__(self,
test_cases,
logger):
self.suite = unittest.TestSuite()
self.logger = logger
for case in test_cases:
tests = unittest.TestLoader().loadTestsFromTestCase(case)
self.suite.addTests(tests)
def run(self):
self.logger.info('Python ' + sys.version)
res = unittest.TextTestRunner(verbosity=3).run(self.suite)
if len(res.errors) >= 1 or len(res.failures) >= 1:
sys.exit(-1)
else:
sys.exit(0)
numpy >= 1.12.1
scikit-learn >= 0.18.1
scipy >= 0.19.0
pandas >= 0.19.2
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-4-25
@author: cheng.li
"""
from setuptools import setup
from setuptools import find_packages
setup(
name='Alpha-Mind',
version='',
packages=find_packages(),
url='',
license='',
author='wegamekinglc',
author_email='',
description=''
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment