Commit 5e586da0 authored by Dr.李's avatar Dr.李

change for rank build implementation

parent ae3af0c2
...@@ -16,21 +16,27 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None: ...@@ -16,21 +16,27 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("Starting portfolio construction by rank benchmarking") print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops)) print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
x = np.random.randn(n_samples, 1) n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = rank_build(x, n_included) calc_weights = rank_build(x, n_included)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
expected_weights = np.zeros((len(x), 1)) exp_weights = np.zeros((len(x), n_portfolio))
expected_weights[(-x).argsort(axis=0).argsort(axis=0) < n_included] = 1. / n_included choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
for j in range(n_portfolio):
exp_weights[choosed_index[:, j], j] = 1. / n_included
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
...@@ -39,12 +45,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in ...@@ -39,12 +45,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
print("Starting portfolio construction by rank with group-by values benchmarking") print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
x = np.random.randn(n_samples, 1) n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = rank_build(x, n_included, groups=groups) calc_weights = rank_build(x, n_included, groups=groups)
impl_model_time = dt.datetime.now() - start impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time)) print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
...@@ -52,11 +60,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in ...@@ -52,11 +60,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
grouped_ordering = pd.DataFrame(-x).groupby(groups).rank() grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
expected_weights = np.zeros((len(x), 1)) exp_weights = np.zeros((len(x), n_portfolio))
masks = grouped_ordering <= n_included masks = (grouped_ordering <= n_included).values
expected_weights[masks] = 1. / np.sum(masks) for j in range(n_portfolio):
exp_weights[masks[:, j], j] = 1. / np.sum(masks[:, j])
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
......
...@@ -6,33 +6,73 @@ Created on 2017-4-29 ...@@ -6,33 +6,73 @@ Created on 2017-4-29
""" """
import numpy as np import numpy as np
cimport numpy as np
from numpy import array from numpy import array
cimport numpy as cnp
cimport cython cimport cython
import cytoolz from cpython.dict cimport PyDict_GetItem, PyDict_SetItem
from cpython.ref cimport PyObject
from cpython.list cimport PyList_Append
cdef inline object _groupby_core(dict d, object key, object item):
cdef PyObject *obj = PyDict_GetItem(d, key)
if obj is NULL:
val = []
PyList_Append(val, item)
PyDict_SetItem(d, key, val)
else:
PyList_Append(<object>obj, item)
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cdef inline long index(tuple x): cpdef list groupby(long[:] groups):
return x[0]
cdef size_t length = groups.shape[0]
cdef dict group_ids = {}
cdef size_t i
cdef long curr_tag
for i in range(length):
_groupby_core(group_ids, groups[i], i)
return [array(v, dtype=np.int64) for v in group_ids.values()]
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef list groupby(long[:] groups): cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1]
cdef size_t i
cdef size_t j
cdef unsigned char* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0]
cdef size_t k
for i in range(length):
k = i * width
for j in range(width):
mat_ptr[index_ptr[k + j] * width + j] = True
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1]
cdef size_t i
cdef size_t j
cdef double* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0]
cdef size_t k
cdef int i for i in range(length):
cdef long d k = i * width
cdef list table for j in range(width):
cdef tuple t mat_ptr[index_ptr[k + j] * width + j] = val
cdef list v
cdef dict group_dict
cdef list group_ids
table = [(d, i) for i, d in enumerate(groups)]
group_dict = cytoolz.groupby(index, table)
group_ids = [array([t[1] for t in v]) for v in group_dict.values()]
return group_ids
\ No newline at end of file
...@@ -8,6 +8,8 @@ Created on 2017-4-26 ...@@ -8,6 +8,8 @@ Created on 2017-4-26
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from alphamind.portfolio.impl import groupby from alphamind.portfolio.impl import groupby
from alphamind.portfolio.impl import set_value_bool
from alphamind.portfolio.impl import set_value_double
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray: def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
...@@ -39,25 +41,32 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -39,25 +41,32 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
masks = zeros((length, width), dtype=bool) masks = zeros((length, width), dtype=bool)
for current_index in group_ids: for current_index in group_ids:
current_ordering = neg_er[current_index].argsort(axis=0) current_ordering = neg_er[current_index].argsort(axis=0)
for j in range(width): total_index = current_index[current_ordering[:use_rank]]
masks[current_index[current_ordering[:use_rank, j]], j] = True set_value_bool(masks.view(dtype=np.uint8), total_index)
choosed = masks.sum(axis=0) choosed = masks.sum(axis=0)
for j in range(width): for j in range(width):
weights[masks[:, j], j] = 1. / choosed[j] weights[masks[:, j], j] = 1. / choosed[j]
else: else:
ordering = neg_er.argsort(axis=0) ordering = neg_er.argsort(axis=0)
for j in range(width): set_value_double(weights, ordering[:use_rank], 1. / use_rank)
weights[ordering[:use_rank, j], j] = 1. / use_rank
return weights return weights
if __name__ == '__main__': if __name__ == '__main__':
n_samples = 4 # n_samples = 4000
n_include = 1 # n_include = 100
n_groups = 2 # n_groups = 20
#
# x = np.random.randn(n_samples, 2)
# groups = np.random.randint(n_groups, size=n_samples)
#
# for i in range(10000):
# rank_build(x, n_include, groups)
x = np.random.randn(n_samples, 2) from alphamind.portfolio.impl import set_value
groups = np.random.randint(n_groups, size=n_samples)
calc_weights = rank_build(x, n_include, groups) x = np.zeros((3, 2), dtype=np.bool)
\ No newline at end of file index = np.array([[1, 0], [2, 1]])
set_value(x.view(dtype=np.uint8), index)
print(x)
\ No newline at end of file
...@@ -11,7 +11,9 @@ from setuptools import setup ...@@ -11,7 +11,9 @@ from setuptools import setup
from setuptools import find_packages from setuptools import find_packages
from distutils.extension import Extension from distutils.extension import Extension
import numpy as np import numpy as np
import Cython
from Cython.Build import cythonize from Cython.Build import cythonize
Cython.Compiler.Options.annotate = True
VERSION = "0.1.0" VERSION = "0.1.0"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment