Commit 258b0348 authored by Dr.李's avatar Dr.李

added rank build tests and benchmark

parent b971f376
......@@ -10,6 +10,8 @@ from numpy import zeros
from numpy import asarray
cimport cython
from libc.math cimport sqrt
from libc.stdlib cimport calloc
from libc.stdlib cimport free
@cython.boundscheck(False)
......@@ -26,69 +28,76 @@ cdef int max_groups(long* groups, size_t length) nogil:
curr_max = curr
return curr_max
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double[:, :] agg_mean(long* groups, double* x, size_t length, size_t width):
cdef double* agg_mean(long* groups, double* x, size_t length, size_t width) nogil:
cdef long max_g = max_groups(groups, length)
cdef double[:, :] res = zeros((max_g+1, width))
cdef double* res_ptr = &res[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
with nogil:
for i in range(length):
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
for j in range(width):
res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
loop_idx1 = i*width
for j in range(width):
res_ptr[groups[i]*width + j] += x[i*width + j]
bin_count_ptr[groups[i]] += 1
res_ptr[loop_idx1 + j] /= curr
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
res_ptr[i*width + j] /= curr
return res
free(bin_count_ptr)
return res_ptr
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double[:, :] agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1):
cdef double* agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1) nogil:
cdef long max_g = max_groups(groups, length)
cdef double[:, :] running_sum_square = zeros((max_g+1, width))
cdef double* running_sum_square_ptr = &running_sum_square[0, 0]
cdef double[:, :] running_sum = zeros((max_g+1, width))
cdef double* running_sum_ptr = &running_sum[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef double* running_sum_square_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef double* running_sum_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
cdef size_t i
cdef size_t j
cdef long k
cdef size_t indice
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
cdef double raw_value
with nogil:
for i in range(length):
k = groups[i]
for i in range(length):
loop_idx1 = i * width
loop_idx2 = groups[i] * width
for j in range(width):
raw_value = x[loop_idx1 + j]
running_sum_ptr[loop_idx2 + j] += raw_value
running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
loop_idx1 = i * width
if curr != 0:
for j in range(width):
raw_value = x[i*width + j]
running_sum_ptr[k*width + j] += raw_value
running_sum_square_ptr[k*width + j] += raw_value * raw_value
bin_count_ptr[k] += 1
loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
indice = i * width + j
running_sum_square_ptr[indice] = sqrt((running_sum_square_ptr[indice] - running_sum_ptr[indice] * running_sum_ptr[indice] / curr) / (curr - ddof))
return running_sum_square
free(running_sum_ptr)
free(bin_count_ptr)
return running_sum_square_ptr
@cython.boundscheck(False)
......@@ -100,23 +109,22 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cdef size_t width = x.shape[1]
cdef double[:, :] res_data = zeros((length, width))
cdef double* res_data_ptr = &res_data[0, 0]
cdef double[:, :] value_data = zeros((length, width))
cdef double* value_data_ptr
cdef size_t i
cdef size_t j
cdef size_t k
cdef size_t loop_idx1
cdef size_t loop_idx2
if func == 'mean':
value_data = agg_mean(&groups[0], &x[0, 0], length, width)
value_data_ptr = agg_mean(&groups[0], &x[0, 0], length, width)
elif func == 'std':
value_data = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
value_data_ptr = &value_data[0, 0]
value_data_ptr = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
with nogil:
for i in range(length):
k = groups[i]
loop_idx1 = i*width
loop_idx2 = groups[i] * width
for j in range(width):
res_data_ptr[i*width + j] = value_data_ptr[k*width + j]
res_data_ptr[loop_idx1 + j] = value_data_ptr[loop_idx2 + j]
free(value_data_ptr)
return asarray(res_data)
\ No newline at end of file
......@@ -10,6 +10,8 @@ from alphamind.benchmarks.data.standardize import benchmark_standardize
from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
if __name__ == '__main__':
......@@ -28,3 +30,9 @@ if __name__ == '__main__':
benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
import datetime as dt
import numpy as np
import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build
def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("-" * 60)
print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
x = np.random.randn(n_samples)
start = dt.datetime.now()
for _ in range(n_loops):
_ = rank_build(x, n_included)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
expected_weights = np.zeros(len(x))
expected_weights[(-x).argsort().argsort() < n_included] = 1. / n_included
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None:
print("-" * 60)
print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
x = np.random.randn(n_samples)
groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now()
for _ in range(n_loops):
_ = rank_build(x, n_included, groups=groups)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
grouped_ordering = pd.Series(-x).groupby(groups).rank()
expected_weights = np.zeros(len(x))
masks = grouped_ordering <= n_included
expected_weights[masks] = 1. / np.sum(masks)
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__':
benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30)
......@@ -15,12 +15,15 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
if groups is not None:
max_g = np.max(groups)
index_range = np.arange(len(er))
for i in range(max_g + 1):
current_mask = groups == i
current_ordering = ordering[current_mask]
masks[current_ordering[:use_rank]] = True
current_index = index_range[current_mask]
current_ordering = neg_er[current_mask].argsort()
masks[current_index[current_ordering[:use_rank]]] = True
else:
masks[ordering[:use_rank]] = True
weights = np.zeros(len(er))
......@@ -33,10 +36,10 @@ if __name__ == '__main__':
import datetime as dt
x = np.random.randn(3000)
groups = np.random.randint(20, 50, size=3000)
groups = np.random.randint(30, size=3000)
start = dt.datetime.now()
for i in range(10000):
weights = rank_build(x, 20, groups)
weights = rank_build(x, 30, groups)
print(dt.datetime.now() - start)
#print(x, '\n', weights)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build
class TestRankBuild(unittest.TestCase):
def test_rank_build(self):
n_samples = 3000
n_included = 300
x = np.random.randn(n_samples)
calc_weights = rank_build(x, n_included)
expected_weights = np.zeros(len(x))
expected_weights[(-x).argsort().argsort() < n_included] = 1. / n_included
np.testing.assert_array_almost_equal(calc_weights, expected_weights)
def test_rank_build_with_group(self):
n_samples = 3000
n_include = 10
n_groups = 30
x = np.random.randn(n_samples)
groups = np.random.randint(n_groups, size=n_samples)
calc_weights = rank_build(x, n_include, groups)
grouped_ordering = pd.Series(-x).groupby(groups).rank()
expected_weights = np.zeros(len(x))
masks = grouped_ordering <= n_include
expected_weights[masks] = 1. / np.sum(masks)
np.testing.assert_array_almost_equal(calc_weights, expected_weights)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment