Commit 70214fe0 authored by Dr.李's avatar Dr.李

enhance performance

parent 106e3b19
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# distutils: language = c++
""" """
Created on 2017-4-26 Created on 2017-4-26
...@@ -16,6 +17,8 @@ from numpy import array ...@@ -16,6 +17,8 @@ from numpy import array
from cpython.dict cimport PyDict_GetItem, PyDict_SetItem from cpython.dict cimport PyDict_GetItem, PyDict_SetItem
from cpython.ref cimport PyObject from cpython.ref cimport PyObject
from cpython.list cimport PyList_Append from cpython.list cimport PyList_Append
from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref
np.import_array() np.import_array()
...@@ -54,19 +57,21 @@ cpdef list groupby(long[:] groups): ...@@ -54,19 +57,21 @@ cpdef list groupby(long[:] groups):
@cython.initializedcheck(False) @cython.initializedcheck(False)
cdef long* group_mapping(long* groups, size_t length, size_t* max_g): cdef long* group_mapping(long* groups, size_t length, size_t* max_g):
cdef long *res_ptr = <long*>calloc(length, sizeof(int)) cdef long *res_ptr = <long*>calloc(length, sizeof(int))
cdef dict current_hold = {} cdef cpp_map[long, long] current_hold
cdef long curr_g cdef long curr_g
cdef long running_g = -1 cdef long running_g = -1
cdef size_t i = 0 cdef size_t i = 0
cdef cpp_map[long, long].iterator it
for i in range(length): for i in range(length):
curr_g = groups[i] curr_g = groups[i]
if curr_g not in current_hold: it = current_hold.find(curr_g)
if it == current_hold.end():
running_g += 1 running_g += 1
res_ptr[i] = running_g res_ptr[i] = running_g
current_hold[curr_g] = running_g current_hold[curr_g] = running_g
else: else:
res_ptr[i] = current_hold[curr_g] res_ptr[i] = deref(it).second
max_g[0] = running_g max_g[0] = running_g
return res_ptr return res_ptr
......
...@@ -57,7 +57,7 @@ def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loop ...@@ -57,7 +57,7 @@ def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loop
ret_series.shape = -1, 1 ret_series.shape = -1, 1
for _ in range(n_loops): for _ in range(n_loops):
ret_mat = weights * ret_series ret_mat = weights * ret_series
exp_ret = pd.DataFrame(ret_mat).groupby(groups).sum().values exp_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
np.testing.assert_array_almost_equal(calc_ret, exp_ret) np.testing.assert_array_almost_equal(calc_ret, exp_ret)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment