Commit ae3af0c2 authored by Dr.李's avatar Dr.李

made rank_build work with very large groups

parent 1c62292b
...@@ -6,8 +6,6 @@ Created on 2017-4-26 ...@@ -6,8 +6,6 @@ Created on 2017-4-26
""" """
cimport numpy as np cimport numpy as np
from numpy import zeros
from numpy import asarray
cimport cython cimport cython
from libc.math cimport sqrt from libc.math cimport sqrt
from libc.math cimport fabs from libc.math cimport fabs
......
...@@ -18,30 +18,30 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle ...@@ -18,30 +18,30 @@ from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) # benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize(30, 10, 50000) # benchmark_neutralize(30, 10, 50000)
benchmark_neutralize(50000, 50, 20) # benchmark_neutralize(50000, 50, 20)
benchmark_standardize(3000, 10, 1000) # benchmark_standardize(3000, 10, 1000)
benchmark_standardize_with_group(3000, 10, 1000, 30) # benchmark_standardize_with_group(3000, 10, 1000, 30)
benchmark_standardize(30, 10, 50000) # benchmark_standardize(30, 10, 50000)
benchmark_standardize_with_group(30, 10, 5000, 5) # benchmark_standardize_with_group(30, 10, 5000, 5)
benchmark_standardize(50000, 50, 20) # benchmark_standardize(50000, 50, 20)
benchmark_standardize_with_group(50000, 50, 20, 50) # benchmark_standardize_with_group(50000, 50, 20, 50)
benchmark_winsorize_normal(3000, 10, 1000) # benchmark_winsorize_normal(3000, 10, 1000)
benchmark_winsorize_normal_with_group(3000, 10, 1000, 30) # benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
benchmark_winsorize_normal(30, 10, 50000) # benchmark_winsorize_normal(30, 10, 50000)
benchmark_winsorize_normal_with_group(30, 10, 5000, 5) # benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
benchmark_winsorize_normal(50000, 50, 20) # benchmark_winsorize_normal(50000, 50, 20)
benchmark_winsorize_normal_with_group(50000, 50, 20, 50) # benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
benchmark_build_rank(3000, 1000, 300) benchmark_build_rank(3000, 1000, 300)
benchmark_build_rank_with_group(3000, 1000, 10, 30) benchmark_build_rank_with_group(3000, 1000, 10, 30)
benchmark_build_rank(30, 50000, 3) benchmark_build_rank(30, 50000, 3)
benchmark_build_rank_with_group(30, 50000, 1, 3) benchmark_build_rank_with_group(30, 50000, 1, 3)
benchmark_build_rank(50000, 20, 3000) benchmark_build_rank(50000, 20, 3000)
benchmark_build_rank_with_group(50000, 20, 10, 300) benchmark_build_rank_with_group(50000, 20, 10, 300)
benchmark_simple_settle(3000, 10, 1000) # benchmark_simple_settle(3000, 10, 1000)
benchmark_simple_settle_with_group(3000, 10, 1000, 30) # benchmark_simple_settle_with_group(3000, 10, 1000, 30)
benchmark_simple_settle(30, 10, 50000) # benchmark_simple_settle(30, 10, 50000)
benchmark_simple_settle_with_group(30, 10, 5000, 5) # benchmark_simple_settle_with_group(30, 10, 5000, 5)
benchmark_simple_settle(50000, 50, 20) # benchmark_simple_settle(50000, 50, 20)
benchmark_simple_settle_with_group(50000, 50, 20, 50) # benchmark_simple_settle_with_group(50000, 50, 20, 50)
# -*- coding: utf-8 -*-
"""
Created on 2017-4-29
@author: cheng.li
"""
import numpy as np
from numpy import array
cimport numpy as cnp
cimport cython
import cytoolz
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cdef inline long index(tuple x):
return x[0]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef list groupby(long[:] groups):
cdef int i
cdef long d
cdef list table
cdef tuple t
cdef list v
cdef dict group_dict
cdef list group_ids
table = [(d, i) for i, d in enumerate(groups)]
group_dict = cytoolz.groupby(index, table)
group_ids = [array([t[1] for t in v]) for v in group_dict.values()]
return group_ids
\ No newline at end of file
...@@ -7,7 +7,7 @@ Created on 2017-4-26 ...@@ -7,7 +7,7 @@ Created on 2017-4-26
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import arange from alphamind.portfolio.impl import groupby
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray: def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
...@@ -18,13 +18,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -18,13 +18,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
length = len(neg_er) length = len(neg_er)
weights = zeros((length, 1)) weights = zeros((length, 1))
if groups is not None: if groups is not None:
max_g = groups.max() group_ids = groupby(groups)
index_range = arange(length)
masks = zeros(length, dtype=bool) masks = zeros(length, dtype=bool)
for i in range(max_g + 1): for current_index in group_ids:
current_mask = groups == i current_ordering = neg_er[current_index].argsort()
current_index = index_range[current_mask]
current_ordering = neg_er[current_mask].argsort()
masks[current_index[current_ordering[:use_rank]]] = True masks[current_index[current_ordering[:use_rank]]] = True
weights[masks] = 1. / masks.sum() weights[masks] = 1. / masks.sum()
else: else:
...@@ -38,13 +35,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -38,13 +35,10 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
weights = zeros((length, width)) weights = zeros((length, width))
if groups is not None: if groups is not None:
max_g = groups.max() group_ids = groupby(groups)
index_range = arange(length)
masks = zeros((length, width), dtype=bool) masks = zeros((length, width), dtype=bool)
for i in range(max_g+1): for current_index in group_ids:
current_mask = groups == i current_ordering = neg_er[current_index].argsort(axis=0)
current_index = index_range[current_mask]
current_ordering = neg_er[current_mask].argsort(axis=0)
for j in range(width): for j in range(width):
masks[current_index[current_ordering[:use_rank, j]], j] = True masks[current_index[current_ordering[:use_rank, j]], j] = True
choosed = masks.sum(axis=0) choosed = masks.sum(axis=0)
...@@ -58,3 +52,12 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -58,3 +52,12 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
return weights return weights
if __name__ == '__main__':
n_samples = 4
n_include = 1
n_groups = 2
x = np.random.randn(n_samples, 2)
groups = np.random.randint(n_groups, size=n_samples)
calc_weights = rank_build(x, n_include, groups)
\ No newline at end of file
...@@ -23,7 +23,8 @@ else: ...@@ -23,7 +23,8 @@ else:
line_trace = False line_trace = False
ext_modules = ['alphamind/aggregate.pyx'] ext_modules = ['alphamind/aggregate.pyx',
'alphamind/portfolio/impl.pyx']
def generate_extensions(ext_modules, line_trace=False): def generate_extensions(ext_modules, line_trace=False):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment