Commit ae3af0c2 authored by Dr.李's avatar Dr.李

made rank_build work with very large groups

parent 1c62292b
......@@ -6,8 +6,6 @@ Created on 2017-4-26
"""
cimport numpy as np
from numpy import zeros
from numpy import asarray
cimport cython
from libc.math cimport sqrt
from libc.math cimport fabs
......
......@@ -18,30 +18,30 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize(30, 10, 50000)
benchmark_neutralize(50000, 50, 20)
benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
# benchmark_neutralize(3000, 10, 1000)
# benchmark_neutralize(30, 10, 50000)
# benchmark_neutralize(50000, 50, 20)
# benchmark_standardize(3000, 10, 1000)
# benchmark_standardize_with_group(3000, 10, 1000, 30)
# benchmark_standardize(30, 10, 50000)
# benchmark_standardize_with_group(30, 10, 5000, 5)
# benchmark_standardize(50000, 50, 20)
# benchmark_standardize_with_group(50000, 50, 20, 50)
# benchmark_winsorize_normal(3000, 10, 1000)
# benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
# benchmark_winsorize_normal(30, 10, 50000)
# benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
# benchmark_winsorize_normal(50000, 50, 20)
# benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group(30, 10, 5000, 5)
benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group(50000, 50, 20, 50)
# benchmark_simple_settle(3000, 10, 1000)
# benchmark_simple_settle_with_group(3000, 10, 1000, 30)
# benchmark_simple_settle(30, 10, 50000)
# benchmark_simple_settle_with_group(30, 10, 5000, 5)
# benchmark_simple_settle(50000, 50, 20)
# benchmark_simple_settle_with_group(50000, 50, 20, 50)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-29
@author: cheng.li
"""
import numpy as np
from numpy import array
cimport numpy as cnp
cimport cython
import cytoolz
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cdef inline long index(tuple x):
return x[0]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef list groupby(long[:] groups):
cdef int i
cdef long d
cdef list table
cdef tuple t
cdef list v
cdef dict group_dict
cdef list group_ids
table = [(d, i) for i, d in enumerate(groups)]
group_dict = cytoolz.groupby(index, table)
group_ids = [array([t[1] for t in v]) for v in group_dict.values()]
return group_ids
\ No newline at end of file
......@@ -7,7 +7,7 @@ Created on 2017-4-26
import numpy as np
from numpy import zeros
from numpy import arange
from alphamind.portfolio.impl import groupby
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
......@@ -18,13 +18,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
length = len(neg_er)
weights = zeros((length, 1))
if groups is not None:
max_g = groups.max()
index_range = arange(length)
group_ids = groupby(groups)
masks = zeros(length, dtype=bool)
for i in range(max_g + 1):
current_mask = groups == i
current_index = index_range[current_mask]
current_ordering = neg_er[current_mask].argsort()
for current_index in group_ids:
current_ordering = neg_er[current_index].argsort()
masks[current_index[current_ordering[:use_rank]]] = True
weights[masks] = 1. / masks.sum()
else:
......@@ -38,13 +35,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
weights = zeros((length, width))
if groups is not None:
max_g = groups.max()
index_range = arange(length)
group_ids = groupby(groups)
masks = zeros((length, width), dtype=bool)
for i in range(max_g+1):
current_mask = groups == i
current_index = index_range[current_mask]
current_ordering = neg_er[current_mask].argsort(axis=0)
for current_index in group_ids:
current_ordering = neg_er[current_index].argsort(axis=0)
for j in range(width):
masks[current_index[current_ordering[:use_rank, j]], j] = True
choosed = masks.sum(axis=0)
......@@ -58,3 +52,12 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
return weights
if __name__ == '__main__':
n_samples = 4
n_include = 1
n_groups = 2
x = np.random.randn(n_samples, 2)
groups = np.random.randint(n_groups, size=n_samples)
calc_weights = rank_build(x, n_include, groups)
\ No newline at end of file
......@@ -23,7 +23,8 @@ else:
line_trace = False
ext_modules = ['alphamind/aggregate.pyx']
ext_modules = ['alphamind/aggregate.pyx',
'alphamind/portfolio/impl.pyx']
def generate_extensions(ext_modules, line_trace=False):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment