Commit 414ed809 authored by Dr.李's avatar Dr.李

restructure

parent bbb01231
# -*- coding: utf-8 -*-
"""
Created on 2017-5-3
@author: cheng.li
"""
import math
import numpy as np
import numba as nb
@nb.njit
def agg_sum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
return res
@nb.njit
def agg_abssum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += abs(x[i, j])
return res
@nb.njit
def agg_mean(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] /= curr
return res
@nb.njit
def agg_std(groups, x, ddof=1):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
sumsq[groups[i], j] += x[i, j] * x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
return res
@nb.njit
def set_value(groups, source, destinantion):
length, width = destinantion.shape
for i in range(length):
k = groups[i]
for j in range(width):
destinantion[i, j] = source[k, j]
def transform(groups, x, func):
res = np.zeros_like(x)
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
set_value(groups, value_data, res)
return res
def aggregate(groups, x, func):
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
return value_data
if __name__ == '__main__':
n_samples = 6000
n_features = 10
n_groups = 30
groups = np.random.randint(n_groups, size=n_samples)
max_g = n_groups - 1
x = np.random.randn(n_samples, n_features)
import datetime as dt
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
#transform = nb.jit(transform)
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
\ No newline at end of file
......@@ -11,7 +11,7 @@ from numpy.linalg import solve
from typing import Tuple
from typing import Union
from typing import Dict
from alphamind.aggregate import groupby
from alphamind.groupby import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
......
......@@ -6,8 +6,8 @@ Created on 2017-4-25
"""
import numpy as np
from alphamind.aggregate import group_mapping
from alphamind.impl import transform
from alphamind.groupby import group_mapping
from alphamind.aggregate import transform
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
......@@ -6,8 +6,8 @@ Created on 2017-4-25
"""
import numpy as np
from alphamind.aggregate import group_mapping
from alphamind.impl import transform
from alphamind.groupby import group_mapping
from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
......
# -*- coding: utf-8 -*-
# distutils: language = c++
"""
Created on 2017-4-26
@author: cheng.li
"""
import numpy as np
from numpy import zeros
from numpy import max as nmax
cimport numpy as np
cimport cython
from libc.math cimport sqrt
from libc.math cimport fabs
from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref
ctypedef long long int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i
cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef np.ndarray[long long, ndim=1] npy_array
for i in range(length):
curr_tag = groups[i]
it = group_ids.find(curr_tag)
if it == group_ids.end():
group_ids[curr_tag] = [i]
else:
deref(it).second.push_back(i)
return [np.array(v) for v in group_ids.values()]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
cdef size_t length = groups.shape[0]
cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
cdef cpp_map[long, long] current_hold
cdef long curr_tag
cdef long running_tag = -1
cdef size_t i = 0
cdef cpp_map[long, long].iterator it
for i in range(length):
curr_tag = groups[i]
it = current_hold.find(curr_tag)
if it == current_hold.end():
running_tag += 1
res[i] = running_tag
current_hold[curr_tag] = running_tag
else:
res[i] = deref(it).second
return res
# -*- coding: utf-8 -*-
# distutils: language = c++
"""
Created on 2017-4-26
@author: cheng.li
"""
import numpy as np
from numpy import zeros
from numpy import max as nmax
cimport numpy as np
cimport cython
from libc.math cimport sqrt
from libc.math cimport fabs
from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref
ctypedef long long int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i
cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef np.ndarray[long long, ndim=1] npy_array
for i in range(length):
curr_tag = groups[i]
it = group_ids.find(curr_tag)
if it == group_ids.end():
group_ids[curr_tag] = [i]
else:
deref(it).second.push_back(i)
return [np.array(v) for v in group_ids.values()]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef np.ndarray[int, ndim=1] group_mapping(long[:] groups):
cdef size_t length = groups.shape[0]
cdef np.ndarray[int, ndim=1] res= zeros(length, dtype=int)
cdef cpp_map[long, long] current_hold
cdef long curr_tag
cdef long running_tag = -1
cdef size_t i = 0
cdef cpp_map[long, long].iterator it
for i in range(length):
curr_tag = groups[i]
it = current_hold.find(curr_tag)
if it == current_hold.end():
running_tag += 1
res[i] = running_tag
current_hold[curr_tag] = running_tag
else:
res[i] = deref(it).second
return res
......@@ -8,7 +8,7 @@ Created on 2017-4-26
import numpy as np
import numba as nb
from numpy import zeros
from alphamind.aggregate import groupby
from alphamind.groupby import groupby
@nb.njit
......
......@@ -6,8 +6,8 @@ Created on 2017-4-28
"""
import numpy as np
from alphamind.aggregate import group_mapping
from alphamind.impl import aggregate
from alphamind.groupby import group_mapping
from alphamind.aggregate import aggregate
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
......@@ -25,7 +25,7 @@ else:
line_trace = False
ext_modules = ['alphamind/aggregate.pyx']
ext_modules = ['alphamind/groupby.pyx']
def generate_extensions(ext_modules, line_trace=False):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment