Commit 5e586da0 authored by Dr.李's avatar Dr.李

change for rank build implementation

parent ae3af0c2
......@@ -16,21 +16,27 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
x = np.random.randn(n_samples, 1)
n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio)
start = dt.datetime.now()
for _ in range(n_loops):
_ = rank_build(x, n_included)
calc_weights = rank_build(x, n_included)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
start = dt.datetime.now()
for _ in range(n_loops):
expected_weights = np.zeros((len(x), 1))
expected_weights[(-x).argsort(axis=0).argsort(axis=0) < n_included] = 1. / n_included
exp_weights = np.zeros((len(x), n_portfolio))
choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
for j in range(n_portfolio):
exp_weights[choosed_index[:, j], j] = 1. / n_included
benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
......@@ -39,12 +45,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
x = np.random.randn(n_samples, 1)
n_portfolio = 10
x = np.random.randn(n_samples, n_portfolio)
groups = np.random.randint(n_groups, size=n_samples)
start = dt.datetime.now()
for _ in range(n_loops):
_ = rank_build(x, n_included, groups=groups)
calc_weights = rank_build(x, n_included, groups=groups)
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
......@@ -52,11 +60,14 @@ def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: in
start = dt.datetime.now()
for _ in range(n_loops):
grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
expected_weights = np.zeros((len(x), 1))
masks = grouped_ordering <= n_included
expected_weights[masks] = 1. / np.sum(masks)
exp_weights = np.zeros((len(x), n_portfolio))
masks = (grouped_ordering <= n_included).values
for j in range(n_portfolio):
exp_weights[masks[:, j], j] = 1. / np.sum(masks[:, j])
benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_weights, exp_weights)
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
......
......@@ -6,33 +6,73 @@ Created on 2017-4-29
"""
import numpy as np
cimport numpy as np
from numpy import array
cimport numpy as cnp
cimport cython
import cytoolz
from cpython.dict cimport PyDict_GetItem, PyDict_SetItem
from cpython.ref cimport PyObject
from cpython.list cimport PyList_Append
cdef inline object _groupby_core(dict d, object key, object item):
cdef PyObject *obj = PyDict_GetItem(d, key)
if obj is NULL:
val = []
PyList_Append(val, item)
PyDict_SetItem(d, key, val)
else:
PyList_Append(<object>obj, item)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cdef inline long index(tuple x):
return x[0]
cpdef list groupby(long[:] groups):
cdef size_t length = groups.shape[0]
cdef dict group_ids = {}
cdef size_t i
cdef long curr_tag
for i in range(length):
_groupby_core(group_ids, groups[i], i)
return [array(v, dtype=np.int64) for v in group_ids.values()]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef list groupby(long[:] groups):
cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1]
cdef size_t i
cdef size_t j
cdef unsigned char* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0]
cdef size_t k
for i in range(length):
k = i * width
for j in range(width):
mat_ptr[index_ptr[k + j] * width + j] = True
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
cdef size_t length = index.shape[0]
cdef size_t width = index.shape[1]
cdef size_t i
cdef size_t j
cdef double* mat_ptr = &mat[0, 0]
cdef long long* index_ptr = &index[0, 0]
cdef size_t k
cdef int i
cdef long d
cdef list table
cdef tuple t
cdef list v
cdef dict group_dict
cdef list group_ids
table = [(d, i) for i, d in enumerate(groups)]
group_dict = cytoolz.groupby(index, table)
group_ids = [array([t[1] for t in v]) for v in group_dict.values()]
return group_ids
\ No newline at end of file
for i in range(length):
k = i * width
for j in range(width):
mat_ptr[index_ptr[k + j] * width + j] = val
......@@ -8,6 +8,8 @@ Created on 2017-4-26
import numpy as np
from numpy import zeros
from alphamind.portfolio.impl import groupby
from alphamind.portfolio.impl import set_value_bool
from alphamind.portfolio.impl import set_value_double
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
......@@ -39,25 +41,32 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
masks = zeros((length, width), dtype=bool)
for current_index in group_ids:
current_ordering = neg_er[current_index].argsort(axis=0)
for j in range(width):
masks[current_index[current_ordering[:use_rank, j]], j] = True
total_index = current_index[current_ordering[:use_rank]]
set_value_bool(masks.view(dtype=np.uint8), total_index)
choosed = masks.sum(axis=0)
for j in range(width):
weights[masks[:, j], j] = 1. / choosed[j]
else:
ordering = neg_er.argsort(axis=0)
for j in range(width):
weights[ordering[:use_rank, j], j] = 1. / use_rank
set_value_double(weights, ordering[:use_rank], 1. / use_rank)
return weights
if __name__ == '__main__':
n_samples = 4
n_include = 1
n_groups = 2
# n_samples = 4000
# n_include = 100
# n_groups = 20
#
# x = np.random.randn(n_samples, 2)
# groups = np.random.randint(n_groups, size=n_samples)
#
# for i in range(10000):
# rank_build(x, n_include, groups)
x = np.random.randn(n_samples, 2)
groups = np.random.randint(n_groups, size=n_samples)
from alphamind.portfolio.impl import set_value
calc_weights = rank_build(x, n_include, groups)
\ No newline at end of file
x = np.zeros((3, 2), dtype=np.bool)
index = np.array([[1, 0], [2, 1]])
set_value(x.view(dtype=np.uint8), index)
print(x)
\ No newline at end of file
......@@ -11,7 +11,9 @@ from setuptools import setup
from setuptools import find_packages
from distutils.extension import Extension
import numpy as np
import Cython
from Cython.Build import cythonize
Cython.Compiler.Options.annotate = True
VERSION = "0.1.0"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment