Commit 414ed809 authored by Dr.李's avatar Dr.李

restructure

parent bbb01231
# -*- coding: utf-8 -*-
"""
Created on 2017-5-3
@author: cheng.li
"""
import math
import numpy as np
import numba as nb
@nb.njit
def agg_sum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
return res
@nb.njit
def agg_abssum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += abs(x[i, j])
return res
@nb.njit
def agg_mean(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] /= curr
return res
@nb.njit
def agg_std(groups, x, ddof=1):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
sumsq[groups[i], j] += x[i, j] * x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
return res
@nb.njit
def set_value(groups, source, destinantion):
length, width = destinantion.shape
for i in range(length):
k = groups[i]
for j in range(width):
destinantion[i, j] = source[k, j]
def transform(groups, x, func):
res = np.zeros_like(x)
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
set_value(groups, value_data, res)
return res
def aggregate(groups, x, func):
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
return value_data
if __name__ == '__main__':
n_samples = 6000
n_features = 10
n_groups = 30
groups = np.random.randint(n_groups, size=n_samples)
max_g = n_groups - 1
x = np.random.randn(n_samples, n_features)
import datetime as dt
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
#transform = nb.jit(transform)
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
\ No newline at end of file
...@@ -11,7 +11,7 @@ from numpy.linalg import solve ...@@ -11,7 +11,7 @@ from numpy.linalg import solve
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from typing import Dict from typing import Dict
from alphamind.aggregate import groupby from alphamind.groupby import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
......
...@@ -6,8 +6,8 @@ Created on 2017-4-25 ...@@ -6,8 +6,8 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import transform from alphamind.aggregate import transform
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
...@@ -6,8 +6,8 @@ Created on 2017-4-25 ...@@ -6,8 +6,8 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import transform from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray: def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# distutils: language = c++ # distutils: language = c++
""" """
Created on 2017-4-26 Created on 2017-4-26
@author: cheng.li @author: cheng.li
""" """
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import max as nmax from numpy import max as nmax
cimport numpy as np cimport numpy as np
cimport cython cimport cython
from libc.math cimport sqrt from libc.math cimport sqrt
from libc.math cimport fabs from libc.math cimport fabs
from libcpp.vector cimport vector as cpp_vector from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref
ctypedef long long int64_t ctypedef long long int64_t
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef groupby(long[:] groups): cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0] cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i cdef long long i
cdef long curr_tag cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef np.ndarray[long long, ndim=1] npy_array cdef np.ndarray[long long, ndim=1] npy_array
for i in range(length): for i in range(length):
curr_tag = groups[i] curr_tag = groups[i]
it = group_ids.find(curr_tag) it = group_ids.find(curr_tag)
if it == group_ids.end(): if it == group_ids.end():
group_ids[curr_tag] = [i] group_ids[curr_tag] = [i]
else: else:
deref(it).second.push_back(i) deref(it).second.push_back(i)
return [np.array(v) for v in group_ids.values()] return [np.array(v) for v in group_ids.values()]
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups): cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
cdef size_t length = groups.shape[0] cdef size_t length = groups.shape[0]
cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int) cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
cdef cpp_map[long, long] current_hold cdef cpp_map[long, long] current_hold
cdef long curr_tag cdef long curr_tag
cdef long running_tag = -1 cdef long running_tag = -1
cdef size_t i = 0 cdef size_t i = 0
cdef cpp_map[long, long].iterator it cdef cpp_map[long, long].iterator it
for i in range(length): for i in range(length):
curr_tag = groups[i] curr_tag = groups[i]
it = current_hold.find(curr_tag) it = current_hold.find(curr_tag)
if it == current_hold.end(): if it == current_hold.end():
running_tag += 1 running_tag += 1
res[i] = running_tag res[i] = running_tag
current_hold[curr_tag] = running_tag current_hold[curr_tag] = running_tag
else: else:
res[i] = deref(it).second res[i] = deref(it).second
return res return res
...@@ -8,7 +8,7 @@ Created on 2017-4-26 ...@@ -8,7 +8,7 @@ Created on 2017-4-26
import numpy as np import numpy as np
import numba as nb import numba as nb
from numpy import zeros from numpy import zeros
from alphamind.aggregate import groupby from alphamind.groupby import groupby
@nb.njit @nb.njit
......
...@@ -6,8 +6,8 @@ Created on 2017-4-28 ...@@ -6,8 +6,8 @@ Created on 2017-4-28
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import aggregate from alphamind.aggregate import aggregate
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
...@@ -25,7 +25,7 @@ else: ...@@ -25,7 +25,7 @@ else:
line_trace = False line_trace = False
ext_modules = ['alphamind/aggregate.pyx'] ext_modules = ['alphamind/groupby.pyx']
def generate_extensions(ext_modules, line_trace=False): def generate_extensions(ext_modules, line_trace=False):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment