Commit 414ed809 authored by Dr.李's avatar Dr.李

restructure

parent bbb01231
# -*- coding: utf-8 -*-
"""
Created on 2017-5-3
@author: cheng.li
"""
import math
import numpy as np
import numba as nb
@nb.njit
def agg_sum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
return res
@nb.njit
def agg_abssum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += abs(x[i, j])
return res
@nb.njit
def agg_mean(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] /= curr
return res
@nb.njit
def agg_std(groups, x, ddof=1):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
sumsq[groups[i], j] += x[i, j] * x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count[i]
for j in range(width):
res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
return res
@nb.njit
def set_value(groups, source, destinantion):
length, width = destinantion.shape
for i in range(length):
k = groups[i]
for j in range(width):
destinantion[i, j] = source[k, j]
def transform(groups, x, func):
res = np.zeros_like(x)
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
set_value(groups, value_data, res)
return res
def aggregate(groups, x, func):
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
value_data = agg_std(groups, x, ddof=1)
elif func == 'sum':
value_data = agg_sum(groups, x)
elif func =='abssum':
value_data = agg_abssum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
return value_data
if __name__ == '__main__':
n_samples = 6000
n_features = 10
n_groups = 30
groups = np.random.randint(n_groups, size=n_samples)
max_g = n_groups - 1
x = np.random.randn(n_samples, n_features)
import datetime as dt
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
#transform = nb.jit(transform)
start = dt.datetime.now()
for i in range(1000):
res = aggregate(groups, x, 'mean')
print(dt.datetime.now() - start)
\ No newline at end of file
...@@ -11,7 +11,7 @@ from numpy.linalg import solve ...@@ -11,7 +11,7 @@ from numpy.linalg import solve
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from typing import Dict from typing import Dict
from alphamind.aggregate import groupby from alphamind.groupby import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
......
...@@ -6,8 +6,8 @@ Created on 2017-4-25 ...@@ -6,8 +6,8 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import transform from alphamind.aggregate import transform
def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
...@@ -6,8 +6,8 @@ Created on 2017-4-25 ...@@ -6,8 +6,8 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import transform from alphamind.aggregate import transform
def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray: def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
......
...@@ -8,7 +8,7 @@ Created on 2017-4-26 ...@@ -8,7 +8,7 @@ Created on 2017-4-26
import numpy as np import numpy as np
import numba as nb import numba as nb
from numpy import zeros from numpy import zeros
from alphamind.aggregate import groupby from alphamind.groupby import groupby
@nb.njit @nb.njit
......
...@@ -6,8 +6,8 @@ Created on 2017-4-28 ...@@ -6,8 +6,8 @@ Created on 2017-4-28
""" """
import numpy as np import numpy as np
from alphamind.aggregate import group_mapping from alphamind.groupby import group_mapping
from alphamind.impl import aggregate from alphamind.aggregate import aggregate
def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
......
...@@ -25,7 +25,7 @@ else: ...@@ -25,7 +25,7 @@ else:
line_trace = False line_trace = False
ext_modules = ['alphamind/aggregate.pyx'] ext_modules = ['alphamind/groupby.pyx']
def generate_extensions(ext_modules, line_trace=False): def generate_extensions(ext_modules, line_trace=False):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment