Commit 080d34d3 authored by Dr.李's avatar Dr.李

added projection standardizer

parent 4409c54d
......@@ -12,6 +12,7 @@ from alphamind.utilities import aggregate
from alphamind.utilities import array_index
from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std
from alphamind.utilities import simple_sqrsum
def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
......@@ -26,6 +27,15 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
if groups is not None and axis == 0:
groups = group_mapping(groups)
projected = transform(groups, x, 'project')
return projected
else:
return x / simple_sqrsum(x, axis=axis).reshape((-1, 1))
class Standardizer(object):
def __init__(self, ddof: int=1):
......
......@@ -10,6 +10,7 @@ import numpy as np
import pandas as pd
from scipy.stats import zscore
from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import GroupedStandardizer
......@@ -26,6 +27,20 @@ class TestStandardize(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
def test_projection(self):
calc_projected = projection(self.x)
exp_projected = self.x / np.sqrt(np.sum(np.square(self.x), axis=1).reshape((-1, 1)))
np.testing.assert_array_almost_equal(calc_projected, exp_projected)
def test_projection_with_groups(self):
calc_projected = projection(self.x, self.groups, axis=0)
exp_projected = pd.DataFrame(self.x).groupby(
self.groups
).transform(lambda s: s / np.sqrt(np.square(s).sum(axis=0)))
np.testing.assert_array_almost_equal(calc_projected, exp_projected)
def test_standardize_with_group(self):
calc_zscore = standardize(self.x, self.groups)
exp_zscore = pd.DataFrame(self.x).\
......
......@@ -93,6 +93,27 @@ def simple_abssum(x, axis=0):
return res
def simple_sqrsum(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for i in range(length):
for j in range(width):
res[j] += x[i, j] * x[i, j]
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j] * x[i, j]
else:
raise ValueError("axis value is not supported")
res = np.sqrt(res)
return res
@nb.njit(nogil=True, cache=True)
def simple_mean(x, axis=0):
length, width = x.shape
......@@ -152,6 +173,19 @@ def agg_sum(groups, x):
return res
def agg_sqrsum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j] * x[i, j]
res = np.sqrt(res)
return res
@nb.njit(nogil=True, cache=True)
def agg_abssum(groups, x):
max_g = groups.max()
......@@ -256,10 +290,12 @@ def transform(groups: np.ndarray,
value_data = agg_sum(groups, x)
elif func == 'abssum' or func == 'scale':
value_data = agg_abssum(groups, x)
elif func == 'sqrsum' or func == 'project':
value_data = agg_sqrsum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
if func == 'scale':
if func == 'scale' or func == 'project':
return scale_value(groups, value_data, x, scale)
else:
return copy_value(groups, value_data)
......@@ -274,6 +310,8 @@ def aggregate(groups, x, func, ddof=1):
value_data = agg_sum(groups, x)
elif func == 'abssum' or func == 'scale':
value_data = agg_abssum(groups, x)
elif func == 'sqrsum' or func == 'project':
value_data = agg_sqrsum(groups, x)
else:
raise ValueError('({0}) is not recognized as valid functor'.format(func))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment