Commit 7b72cb5a authored by Dr.李's avatar Dr.李

added more

parent b9d2a972
*.pyc *.pyc
.idea/* .idea/*
\ No newline at end of file build/*
dist/*
Alpha_Mind.egg-info/*
*.pyd
*.c
*.cpp
*.html
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
This diff is collapsed.
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
from alphamind.benchmarks.data.neutralize import benchmark_neutralize from alphamind.benchmarks.data.neutralize import benchmark_neutralize
from alphamind.benchmarks.data.standardize import benchmark_standardize from alphamind.benchmarks.data.standardize import benchmark_standardize
from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize(30, 10, 50000) benchmark_neutralize(30, 10, 50000)
benchmark_neutralize(50000, 50, 20) benchmark_neutralize(50000, 50, 20)
benchmark_standardize(3000, 10, 1000) benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30) benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize(30, 10, 50000) benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group(30, 10, 5000, 5) benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize(50000, 50, 20) benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group(50000, 50, 20, 50) benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000) benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000) benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5) benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20) benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50) benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300) benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30) benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3) benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3) benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000) benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300) benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle(3000, 10, 1000) benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group(3000, 10, 1000, 30) benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle(30, 10, 50000) benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group(30, 10, 5000, 5) benchmark_simple_settle_with_group(30, 10, 5000, 5)
benchmark_simple_settle(50000, 50, 20) benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group(50000, 50, 20, 50) benchmark_simple_settle_with_group(50000, 50, 20, 50)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting least square fitting benchmarking") print("Starting least square fitting benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
y = np.random.randn(n_samples, 5) y = np.random.randn(n_samples, 5)
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = neutralize(x, y) _ = neutralize(x, y)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
benchmark_model = LinearRegression(fit_intercept=False) benchmark_model = LinearRegression(fit_intercept=False)
benchmark_model.fit(x, y) benchmark_model.fit(x, y)
_ = y - x @ benchmark_model.coef_.T _ = y - x @ benchmark_model.coef_.T
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) benchmark_neutralize(3000, 10, 1000)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import zscore from scipy.stats import zscore
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting standardizing benchmarking") print("Starting standardizing benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = standardize(x) _ = standardize(x)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = zscore(x) _ = zscore(x)
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None: def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting standardizing with group-by values benchmarking") print("Starting standardizing with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = standardize(x, groups=groups) _ = standardize(x, groups=groups)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0)) _ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_standardize(3000, 10, 1000) benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30) benchmark_standardize_with_group(3000, 10, 1000, 30)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting winsorize normal benchmarking") print("Starting winsorize normal benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
num_stds = 2 num_stds = 2
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = winsorize_normal(x, num_stds) _ = winsorize_normal(x, num_stds)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
def impl(x): def impl(x):
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
upper_bound = mean_value + num_stds * std_values upper_bound = mean_value + num_stds * std_values
res = np.where(x > upper_bound, upper_bound, x) res = np.where(x > upper_bound, upper_bound, x)
res = np.where(res < lower_bound, lower_bound, res) res = np.where(res < lower_bound, lower_bound, res)
return res return res
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = impl(x) _ = impl(x)
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None: def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting winsorize normal with group-by values benchmarking") print("Starting winsorize normal with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
num_stds = 2 num_stds = 2
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = winsorize_normal(x, num_stds, groups=groups) _ = winsorize_normal(x, num_stds, groups=groups)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
def impl(x): def impl(x):
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
upper_bound = mean_value + num_stds * std_values upper_bound = mean_value + num_stds * std_values
res = np.where(x > upper_bound, upper_bound, x) res = np.where(x > upper_bound, upper_bound, x)
res = np.where(res < lower_bound, lower_bound, res) res = np.where(res < lower_bound, lower_bound, res)
return res return res
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = pd.DataFrame(x).groupby(groups).transform(impl) _ = pd.DataFrame(x).groupby(groups).transform(impl)
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_winsorize_normal(3000, 10, 1000) benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-27 Created on 2017-4-27
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-27 Created on 2017-4-27
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None: def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by rank benchmarking") print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops)) print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
n_portfolio = 10 n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio) x = np.random.randn(n_samples, n_portfolio)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
calc_weights = rank_build(x, n_included) calc_weights = rank_build(x, n_included)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
exp_weights = np.zeros((len(x), n_portfolio)) exp_weights = np.zeros((len(x), n_portfolio))
choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
for j in range(n_portfolio): for j in range(n_portfolio):
exp_weights[choosed_index[:, j], j] = 1. exp_weights[choosed_index[:, j], j] = 1.
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights) np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None: def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by rank with group-by values benchmarking") print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
n_portfolio = 10 n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio) x = np.random.randn(n_samples, n_portfolio)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
calc_weights = rank_build(x, n_included, groups=groups) calc_weights = rank_build(x, n_included, groups=groups)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
exp_weights = np.zeros((len(x), n_portfolio)) exp_weights = np.zeros((len(x), n_portfolio))
masks = (grouped_ordering <= n_included).values masks = (grouped_ordering <= n_included).values
for j in range(n_portfolio): for j in range(n_portfolio):
exp_weights[masks[:, j], j] = 1. exp_weights[masks[:, j], j] = 1.
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights) np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_build_rank(3000, 1000, 300) benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30) benchmark_build_rank_with_group(3000, 1000, 10, 30)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.settlement.simplesettle import simple_settle from alphamind.settlement.simplesettle import simple_settle
def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None: def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting simple settle benchmarking") print("Starting simple settle benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops)) print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops))
weights = np.random.randn(n_samples, n_portfolios) weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
calc_ret = simple_settle(weights, ret_series) calc_ret = simple_settle(weights, ret_series)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
ret_series.shape = -1, 1 ret_series.shape = -1, 1
for _ in range(n_loops): for _ in range(n_loops):
exp_ret = (weights * ret_series).sum(axis=0) exp_ret = (weights * ret_series).sum(axis=0)
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_ret, exp_ret) np.testing.assert_array_almost_equal(calc_ret, exp_ret)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None: def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting simple settle with group-by values benchmarking") print("Starting simple settle with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups))
weights = np.random.randn(n_samples, n_portfolios) weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
calc_ret = simple_settle(weights, ret_series, groups=groups) calc_ret = simple_settle(weights, ret_series, groups=groups)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
ret_series.shape = -1, 1 ret_series.shape = -1, 1
for _ in range(n_loops): for _ in range(n_loops):
ret_mat = weights * ret_series ret_mat = weights * ret_series
exp_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values exp_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_ret, exp_ret) np.testing.assert_array_almost_equal(calc_ret, exp_ret)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_simple_settle(3000, 3, 1000) benchmark_simple_settle(3000, 3, 1000)
benchmark_simple_settle_with_group(3000, 3, 1000, 30) benchmark_simple_settle_with_group(3000, 3, 1000, 30)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from numpy.linalg import solve from numpy.linalg import solve
from alphamind.aggregate import groupby from typing import Tuple
from alphamind.aggregate import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if groups is not None: def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Tuple[np.ndarray, np.ndarray]:
res = np.zeros(y.shape) if groups is not None:
groups_ids = groupby(groups) res = np.zeros(y.shape)
groups_ids = groupby(groups)
for curr_idx in groups_ids:
curr_x = x[curr_idx] for curr_idx in groups_ids:
curr_y = y[curr_idx] curr_x = x[curr_idx]
b = ls_fit(x[curr_idx], y[curr_idx]) curr_y = y[curr_idx]
res[curr_idx] = ls_res(curr_x, curr_y, b) b = ls_fit(x[curr_idx], y[curr_idx])
return res res[curr_idx] = ls_res(curr_x, curr_y, b)
else: return res
b = ls_fit(x, y) else:
return ls_res(x, y, b) b = ls_fit(x, y)
return ls_res(x, y, b)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
b = solve(x_bar @ x, x_bar @ y) x_bar = x.T
return b b = solve(x_bar @ x, x_bar @ y)
return b
def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return y - x @ b
if __name__ == '__main__':
def ls_explained(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
x = np.random.randn(3000, 3) pass
y = np.random.randn(3000, 2)
groups = np.random.randint(30, size=3000)
if __name__ == '__main__':
print(neutralize(x, y, groups))
\ No newline at end of file x = np.random.randn(3000, 3)
y = np.random.randn(3000, 2)
groups = np.random.randint(30, size=3000)
b = ls_fit(x, y)
ls_explained(x, y, b)
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from alphamind.aggregate import transform from alphamind.aggregate import transform
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if groups is not None: if groups is not None:
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std') std_values = transform(groups, x, 'std')
return (x - mean_values) / std_values return (x - mean_values) / std_values
else: else:
return (x - x.mean(axis=0)) / x.std(axis=0) return (x - x.mean(axis=0)) / x.std(axis=0)
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from alphamind.aggregate import transform from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray: def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
if groups is not None: if groups is not None:
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std') std_values = transform(groups, x, 'std')
else: else:
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_values = x.mean(axis=0) mean_values = x.mean(axis=0)
ubound = mean_values + num_stds * std_values ubound = mean_values + num_stds * std_values
lbound = mean_values - num_stds * std_values lbound = mean_values - num_stds * std_values
res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x)) res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x))
return res return res
if __name__ == '__main__': if __name__ == '__main__':
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(0, 20, size=3000) groups = np.random.randint(0, 20, size=3000)
for _ in range(1000): for _ in range(1000):
winsorize_normal(x, 2, groups) winsorize_normal(x, 2, groups)
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-26 Created on 2017-4-26
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-29 Created on 2017-4-29
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
cimport cython cimport cython
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index): cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
cdef size_t length = index.shape[0] cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1] cdef size_t width = index.shape[1]
cdef size_t i cdef size_t i
cdef size_t j cdef size_t j
cdef unsigned char* mat_ptr = &mat[0, 0] cdef unsigned char* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0] cdef long long* index_ptr = &index[0, 0]
cdef size_t k cdef size_t k
for i in range(length): for i in range(length):
k = i * width k = i * width
for j in range(width): for j in range(width):
mat_ptr[index_ptr[k + j] * width + j] = True mat_ptr[index_ptr[k + j] * width + j] = True
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val): cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
cdef size_t length = index.shape[0] cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1] cdef size_t width = index.shape[1]
cdef size_t i cdef size_t i
cdef size_t j cdef size_t j
cdef double* mat_ptr = &mat[0, 0] cdef double* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0] cdef long long* index_ptr = &index[0, 0]
cdef size_t k cdef size_t k
for i in range(length): for i in range(length):
k = i * width k = i * width
for j in range(width): for j in range(width):
mat_ptr[index_ptr[k + j] * width + j] = val mat_ptr[index_ptr[k + j] * width + j] = val
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-26 Created on 2017-4-26
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from alphamind.aggregate import groupby from alphamind.aggregate import groupby
from alphamind.portfolio.impl import set_value_bool from alphamind.portfolio.impl import set_value_bool
from alphamind.portfolio.impl import set_value_double from alphamind.portfolio.impl import set_value_double
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray: def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1): if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
""" fast path methods for single column er""" """ fast path methods for single column er"""
neg_er = -er.flatten() neg_er = -er.flatten()
length = len(neg_er) length = len(neg_er)
weights = zeros((length, 1)) weights = zeros((length, 1))
if groups is not None: if groups is not None:
group_ids = groupby(groups) group_ids = groupby(groups)
masks = zeros(length, dtype=bool) masks = zeros(length, dtype=bool)
for current_index in group_ids: for current_index in group_ids:
current_ordering = neg_er[current_index].argsort() current_ordering = neg_er[current_index].argsort()
masks[current_index[current_ordering[:use_rank]]] = True masks[current_index[current_ordering[:use_rank]]] = True
weights[masks] = 1. weights[masks] = 1.
else: else:
ordering = neg_er.argsort() ordering = neg_er.argsort()
weights[ordering[:use_rank]] = 1. weights[ordering[:use_rank]] = 1.
return weights return weights
else: else:
length = er.shape[0] length = er.shape[0]
width = er.shape[1] width = er.shape[1]
neg_er = -er neg_er = -er
weights = zeros((length, width)) weights = zeros((length, width))
if groups is not None: if groups is not None:
group_ids = groupby(groups) group_ids = groupby(groups)
masks = zeros((length, width), dtype=bool) masks = zeros((length, width), dtype=bool)
for current_index in group_ids: for current_index in group_ids:
current_ordering = neg_er[current_index].argsort(axis=0) current_ordering = neg_er[current_index].argsort(axis=0)
total_index = current_index[current_ordering[:use_rank]] total_index = current_index[current_ordering[:use_rank]]
set_value_bool(masks.view(dtype=np.uint8), total_index) set_value_bool(masks.view(dtype=np.uint8), total_index)
for j in range(width): for j in range(width):
weights[masks[:, j], j] = 1. weights[masks[:, j], j] = 1.
else: else:
ordering = neg_er.argsort(axis=0) ordering = neg_er.argsort(axis=0)
set_value_double(weights, ordering[:use_rank], 1.) set_value_double(weights, ordering[:use_rank], 1.)
return weights return weights
if __name__ == '__main__': if __name__ == '__main__':
n_sample = 6 n_sample = 6
n_groups = 3 n_groups = 3
x = np.random.randn(n_sample) x = np.random.randn(n_sample)
groups = np.array([1, 1, 2, 1, 0, 2]) groups = np.array([1, 1, 2, 1, 0, 2])
print(groups) print(groups)
print(groupby(groups)) print(groupby(groups))
print(rank_build(x, 1, groups)) print(rank_build(x, 1, groups))
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from alphamind.aggregate import aggregate from alphamind.aggregate import aggregate
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if ret_series.ndim > 1: if ret_series.ndim > 1:
ret_series = ret_series.flatten() ret_series = ret_series.flatten()
ret_mat = (ret_series * weights.T).T ret_mat = (ret_series * weights.T).T
if groups is not None: if groups is not None:
return aggregate(groups, ret_mat, 'sum') return aggregate(groups, ret_mat, 'sum')
else: else:
return ret_mat.sum(axis=0) return ret_mat.sum(axis=0)
if __name__ == '__main__': if __name__ == '__main__':
from alphamind.aggregate import group_mapping_test from alphamind.aggregate import group_mapping_test
s = np.random.randint(2, 5, size=6) s = np.random.randint(2, 5, size=6)
print(s) print(s)
print(group_mapping_test(s)) print(group_mapping_test(s))
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
class TestNeutralize(unittest.TestCase): class TestNeutralize(unittest.TestCase):
def test_neutralize(self): def test_neutralize(self):
y = np.random.randn(3000, 4) y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_res = neutralize(x, y) calc_res = neutralize(x, y)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(x, y) model.fit(x, y)
exp_res = y - x @ model.coef_.T exp_res = y - x @ model.coef_.T
np.testing.assert_array_almost_equal(calc_res, exp_res) np.testing.assert_array_almost_equal(calc_res, exp_res)
def test_neutralize_with_group(self): def test_neutralize_with_group(self):
y = np.random.randn(3000, 4) y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(30, size=3000) groups = np.random.randint(30, size=3000)
calc_res = neutralize(x, y, groups) calc_res = neutralize(x, y, groups)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
for i in range(30): for i in range(30):
curr_x = x[groups == i] curr_x = x[groups == i]
curr_y = y[groups == i] curr_y = y[groups == i]
model.fit(curr_x, curr_y) model.fit(curr_x, curr_y)
exp_res = curr_y - curr_x @ model.coef_.T exp_res = curr_y - curr_x @ model.coef_.T
np.testing.assert_array_almost_equal(calc_res[groups ==i ], exp_res) np.testing.assert_array_almost_equal(calc_res[groups ==i ], exp_res)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import zscore from scipy.stats import zscore
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
class TestStandardize(unittest.TestCase): class TestStandardize(unittest.TestCase):
def test_standardize(self): def test_standardize(self):
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_zscore = standardize(x) calc_zscore = standardize(x)
exp_zscore = zscore(x) exp_zscore = zscore(x)
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
def test_standardize_with_group(self): def test_standardize_with_group(self):
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(10, 30, size=3000) groups = np.random.randint(10, 30, size=3000)
calc_zscore = standardize(x, groups) calc_zscore = standardize(x, groups)
exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0)) exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
class TestWinsorize(unittest.TestCase): class TestWinsorize(unittest.TestCase):
def test_winsorize_normal(self): def test_winsorize_normal(self):
num_stds = 2 num_stds = 2
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_winsorized = winsorize_normal(x, num_stds) calc_winsorized = winsorize_normal(x, num_stds)
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
upper_bound = mean_value + num_stds * std_values upper_bound = mean_value + num_stds * std_values
for i in range(np.size(calc_winsorized, 1)): for i in range(np.size(calc_winsorized, 1)):
col_data = x[:, i] col_data = x[:, i]
col_data[col_data > upper_bound[i]] = upper_bound[i] col_data[col_data > upper_bound[i]] = upper_bound[i]
col_data[col_data < lower_bound[i]] = lower_bound[i] col_data[col_data < lower_bound[i]] = lower_bound[i]
calculated_col = calc_winsorized[:, i] calculated_col = calc_winsorized[:, i]
np.testing.assert_array_almost_equal(col_data, calculated_col) np.testing.assert_array_almost_equal(col_data, calculated_col)
def test_winsorize_normal_with_group(self): def test_winsorize_normal_with_group(self):
num_stds = 2 num_stds = 2
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(10, 30, size=3000) groups = np.random.randint(10, 30, size=3000)
cal_winsorized = winsorize_normal(x, num_stds, groups) cal_winsorized = winsorize_normal(x, num_stds, groups)
def impl(x): def impl(x):
std_values = x.std(axis=0) std_values = x.std(axis=0)
mean_value = x.mean(axis=0) mean_value = x.mean(axis=0)
lower_bound = mean_value - num_stds * std_values lower_bound = mean_value - num_stds * std_values
upper_bound = mean_value + num_stds * std_values upper_bound = mean_value + num_stds * std_values
res = np.where(x > upper_bound, upper_bound, x) res = np.where(x > upper_bound, upper_bound, x)
res = np.where(res < lower_bound, lower_bound, res) res = np.where(res < lower_bound, lower_bound, res)
return res return res
exp_winsorized = pd.DataFrame(x).groupby(groups).transform(impl).values exp_winsorized = pd.DataFrame(x).groupby(groups).transform(impl).values
np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized) np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-27 Created on 2017-4-27
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-27 Created on 2017-4-27
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
class TestRankBuild(unittest.TestCase): class TestRankBuild(unittest.TestCase):
def test_rank_build(self): def test_rank_build(self):
n_samples = 3000 n_samples = 3000
n_included = 300 n_included = 300
n_portfolios = range(10) n_portfolios = range(10)
for n_portfolio in n_portfolios: for n_portfolio in n_portfolios:
x = np.random.randn(n_samples, n_portfolio) x = np.random.randn(n_samples, n_portfolio)
calc_weights = rank_build(x, n_included) calc_weights = rank_build(x, n_included)
expected_weights = np.zeros((len(x), n_portfolio)) expected_weights = np.zeros((len(x), n_portfolio))
masks = (-x).argsort(axis=0).argsort(axis=0) < n_included masks = (-x).argsort(axis=0).argsort(axis=0) < n_included
for j in range(x.shape[1]): for j in range(x.shape[1]):
expected_weights[masks[:, j], j] = 1. expected_weights[masks[:, j], j] = 1.
np.testing.assert_array_almost_equal(calc_weights, expected_weights) np.testing.assert_array_almost_equal(calc_weights, expected_weights)
def test_rank_build_with_group(self): def test_rank_build_with_group(self):
n_samples = 3000 n_samples = 3000
n_include = 10 n_include = 10
n_groups = 30 n_groups = 30
n_portfolios = range(10) n_portfolios = range(10)
for n_portfolio in n_portfolios: for n_portfolio in n_portfolios:
x = np.random.randn(n_samples, n_portfolio) x = np.random.randn(n_samples, n_portfolio)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
calc_weights = rank_build(x, n_include, groups) calc_weights = rank_build(x, n_include, groups)
grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
expected_weights = np.zeros((len(x), n_portfolio)) expected_weights = np.zeros((len(x), n_portfolio))
masks = (grouped_ordering <= n_include).values masks = (grouped_ordering <= n_include).values
for j in range(x.shape[1]): for j in range(x.shape[1]):
expected_weights[masks[:, j], j] = 1. expected_weights[masks[:, j], j] = 1.
np.testing.assert_array_almost_equal(calc_weights, expected_weights) np.testing.assert_array_almost_equal(calc_weights, expected_weights)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-28 Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.settlement.simplesettle import simple_settle from alphamind.settlement.simplesettle import simple_settle
class TestSimpleSettle(unittest.TestCase): class TestSimpleSettle(unittest.TestCase):
def test_simples_settle(self): def test_simples_settle(self):
n_samples = 3000 n_samples = 3000
n_portfolio = 3 n_portfolio = 3
weights = np.random.randn(n_samples, n_portfolio) weights = np.random.randn(n_samples, n_portfolio)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
calc_ret = simple_settle(weights, ret_series) calc_ret = simple_settle(weights, ret_series)
ret_series.shape = -1, 1 ret_series.shape = -1, 1
expected_ret = (weights * ret_series).sum(axis=0) expected_ret = (weights * ret_series).sum(axis=0)
np.testing.assert_array_almost_equal(calc_ret, expected_ret) np.testing.assert_array_almost_equal(calc_ret, expected_ret)
ret_series = np.random.randn(n_samples, 1) ret_series = np.random.randn(n_samples, 1)
calc_ret = simple_settle(weights, ret_series) calc_ret = simple_settle(weights, ret_series)
expected_ret = (weights * ret_series).sum(axis=0) expected_ret = (weights * ret_series).sum(axis=0)
np.testing.assert_array_almost_equal(calc_ret, expected_ret) np.testing.assert_array_almost_equal(calc_ret, expected_ret)
def test_simple_settle_with_group(self): def test_simple_settle_with_group(self):
n_samples = 3000 n_samples = 3000
n_portfolio = 3 n_portfolio = 3
n_groups = 30 n_groups = 30
weights = np.random.randn(n_samples, n_portfolio) weights = np.random.randn(n_samples, n_portfolio)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
calc_ret = simple_settle(weights, ret_series, groups) calc_ret = simple_settle(weights, ret_series, groups)
ret_series.shape = -1, 1 ret_series.shape = -1, 1
ret_mat = weights * ret_series ret_mat = weights * ret_series
expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
np.testing.assert_array_almost_equal(calc_ret, expected_ret) np.testing.assert_array_almost_equal(calc_ret, expected_ret)
ret_series = np.random.randn(n_samples, 1) ret_series = np.random.randn(n_samples, 1)
calc_ret = simple_settle(weights, ret_series, groups) calc_ret = simple_settle(weights, ret_series, groups)
ret_mat = weights * ret_series ret_mat = weights * ret_series
expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
np.testing.assert_array_almost_equal(calc_ret, expected_ret) np.testing.assert_array_almost_equal(calc_ret, expected_ret)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
from alphamind.utilities import add_parent_path from alphamind.utilities import add_parent_path
add_parent_path(__file__, 3) add_parent_path(__file__, 3)
from alphamind.tests.data.test_neutralize import TestNeutralize from alphamind.tests.data.test_neutralize import TestNeutralize
from alphamind.tests.data.test_standardize import TestStandardize from alphamind.tests.data.test_standardize import TestStandardize
from alphamind.tests.data.test_winsorize import TestWinsorize from alphamind.tests.data.test_winsorize import TestWinsorize
from alphamind.tests.portfolio.test_rankbuild import TestRankBuild from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import TestRunner from alphamind.utilities import TestRunner
if __name__ == '__main__': if __name__ == '__main__':
runner = TestRunner([TestNeutralize, runner = TestRunner([TestNeutralize,
TestStandardize, TestStandardize,
TestWinsorize, TestWinsorize,
TestRankBuild, TestRankBuild,
TestSimpleSettle], TestSimpleSettle],
alpha_logger) alpha_logger)
runner.run() runner.run()
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import os import os
import sys import sys
import logging import logging
import unittest import unittest
alpha_logger = logging.getLogger('ALPHA_MIND') alpha_logger = logging.getLogger('ALPHA_MIND')
ch = logging.StreamHandler() ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter) ch.setFormatter(formatter)
alpha_logger.addHandler(ch) alpha_logger.addHandler(ch)
alpha_logger.setLevel(logging.INFO) alpha_logger.setLevel(logging.INFO)
def add_parent_path(name, level): def add_parent_path(name, level):
current_path = os.path.abspath(name) current_path = os.path.abspath(name)
sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level])) sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level]))
class TestRunner(object): class TestRunner(object):
def __init__(self, def __init__(self,
test_cases, test_cases,
logger): logger):
self.suite = unittest.TestSuite() self.suite = unittest.TestSuite()
self.logger = logger self.logger = logger
for case in test_cases: for case in test_cases:
tests = unittest.TestLoader().loadTestsFromTestCase(case) tests = unittest.TestLoader().loadTestsFromTestCase(case)
self.suite.addTests(tests) self.suite.addTests(tests)
def run(self): def run(self):
self.logger.info('Python ' + sys.version) self.logger.info('Python ' + sys.version)
res = unittest.TextTestRunner(verbosity=3).run(self.suite) res = unittest.TextTestRunner(verbosity=3).run(self.suite)
if len(res.errors) >= 1 or len(res.failures) >= 1: if len(res.errors) >= 1 or len(res.failures) >= 1:
sys.exit(-1) sys.exit(-1)
else: else:
sys.exit(0) sys.exit(0)
cython >= 0.25.2 cython >= 0.25.2
numpy >= 1.12.1 numpy >= 1.12.1
scikit-learn >= 0.18.1 scikit-learn >= 0.18.1
scipy >= 0.19.0 scipy >= 0.19.0
pandas >= 0.19.2 pandas >= 0.19.2
\ No newline at end of file
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Created on 2017-4-25 Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import platform import platform
import sys import sys
from setuptools import setup from setuptools import setup
from setuptools import find_packages from setuptools import find_packages
from distutils.extension import Extension from distutils.extension import Extension
import numpy as np import numpy as np
import Cython import Cython
from Cython.Build import cythonize from Cython.Build import cythonize
Cython.Compiler.Options.annotate = True Cython.Compiler.Options.annotate = True
VERSION = "0.1.0" VERSION = "0.1.0"
if "--line_trace" in sys.argv: if "--line_trace" in sys.argv:
line_trace = True line_trace = True
print("Build with line trace enabled ...") print("Build with line trace enabled ...")
sys.argv.remove("--line_trace") sys.argv.remove("--line_trace")
else: else:
line_trace = False line_trace = False
ext_modules = ['alphamind/aggregate.pyx', ext_modules = ['alphamind/aggregate.pyx',
'alphamind/portfolio/impl.pyx'] 'alphamind/portfolio/impl.pyx']
def generate_extensions(ext_modules, line_trace=False): def generate_extensions(ext_modules, line_trace=False):
extensions = [] extensions = []
if line_trace: if line_trace:
print("define cython trace to True ...") print("define cython trace to True ...")
define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)] define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
else: else:
define_macros = [] define_macros = []
for pyxfile in ext_modules: for pyxfile in ext_modules:
ext = Extension(name='.'.join(pyxfile.split('/'))[:-4], ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
sources=[pyxfile], sources=[pyxfile],
define_macros=define_macros) define_macros=define_macros)
extensions.append(ext) extensions.append(ext)
return extensions return extensions
if platform.system() != "Windows": if platform.system() != "Windows":
import multiprocessing import multiprocessing
n_cpu = multiprocessing.cpu_count() n_cpu = multiprocessing.cpu_count()
else: else:
n_cpu = 0 n_cpu = 0
ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace), ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
compiler_directives={'embedsignature': True, 'linetrace': line_trace}, compiler_directives={'embedsignature': True, 'linetrace': line_trace},
nthreads=n_cpu) nthreads=n_cpu)
setup( setup(
name='Alpha-Mind', name='Alpha-Mind',
version=VERSION, version=VERSION,
packages=find_packages(), packages=find_packages(),
url='', url='',
license='', license='',
author='wegamekinglc', author='wegamekinglc',
author_email='', author_email='',
ext_modules=ext_modules_settings, ext_modules=ext_modules_settings,
include_dirs=[np.get_include()], include_dirs=[np.get_include()],
description='' description=''
) )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment