Commit 080d34d3 authored by Dr.李's avatar Dr.李

added projection standardizer

parent 4409c54d
...@@ -12,6 +12,7 @@ from alphamind.utilities import aggregate ...@@ -12,6 +12,7 @@ from alphamind.utilities import aggregate
from alphamind.utilities import array_index from alphamind.utilities import array_index
from alphamind.utilities import simple_mean from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std from alphamind.utilities import simple_std
from alphamind.utilities import simple_sqrsum
def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
...@@ -26,6 +27,15 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray: ...@@ -26,6 +27,15 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof) return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
if groups is not None and axis == 0:
groups = group_mapping(groups)
projected = transform(groups, x, 'project')
return projected
else:
return x / simple_sqrsum(x, axis=axis).reshape((-1, 1))
class Standardizer(object): class Standardizer(object):
def __init__(self, ddof: int=1): def __init__(self, ddof: int=1):
......
...@@ -10,6 +10,7 @@ import numpy as np ...@@ -10,6 +10,7 @@ import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import zscore from scipy.stats import zscore
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import GroupedStandardizer from alphamind.data.standardize import GroupedStandardizer
...@@ -26,6 +27,20 @@ class TestStandardize(unittest.TestCase): ...@@ -26,6 +27,20 @@ class TestStandardize(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
def test_projection(self):
calc_projected = projection(self.x)
exp_projected = self.x / np.sqrt(np.sum(np.square(self.x), axis=1).reshape((-1, 1)))
np.testing.assert_array_almost_equal(calc_projected, exp_projected)
def test_projection_with_groups(self):
calc_projected = projection(self.x, self.groups, axis=0)
exp_projected = pd.DataFrame(self.x).groupby(
self.groups
).transform(lambda s: s / np.sqrt(np.square(s).sum(axis=0)))
np.testing.assert_array_almost_equal(calc_projected, exp_projected)
def test_standardize_with_group(self): def test_standardize_with_group(self):
calc_zscore = standardize(self.x, self.groups) calc_zscore = standardize(self.x, self.groups)
exp_zscore = pd.DataFrame(self.x).\ exp_zscore = pd.DataFrame(self.x).\
......
...@@ -93,6 +93,27 @@ def simple_abssum(x, axis=0): ...@@ -93,6 +93,27 @@ def simple_abssum(x, axis=0):
return res return res
def simple_sqrsum(x, axis=0):
length, width = x.shape
if axis == 0:
res = np.zeros(width)
for i in range(length):
for j in range(width):
res[j] += x[i, j] * x[i, j]
elif axis == 1:
res = np.zeros(length)
for i in range(length):
for j in range(width):
res[i] += x[i, j] * x[i, j]
else:
raise ValueError("axis value is not supported")
res = np.sqrt(res)
return res
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def simple_mean(x, axis=0): def simple_mean(x, axis=0):
length, width = x.shape length, width = x.shape
...@@ -152,6 +173,19 @@ def agg_sum(groups, x): ...@@ -152,6 +173,19 @@ def agg_sum(groups, x):
return res return res
def agg_sqrsum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j] * x[i, j]
res = np.sqrt(res)
return res
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def agg_abssum(groups, x): def agg_abssum(groups, x):
max_g = groups.max() max_g = groups.max()
...@@ -256,10 +290,12 @@ def transform(groups: np.ndarray, ...@@ -256,10 +290,12 @@ def transform(groups: np.ndarray,
value_data = agg_sum(groups, x) value_data = agg_sum(groups, x)
elif func == 'abssum' or func == 'scale': elif func == 'abssum' or func == 'scale':
value_data = agg_abssum(groups, x) value_data = agg_abssum(groups, x)
elif func == 'sqrsum' or func == 'project':
value_data = agg_sqrsum(groups, x)
else: else:
raise ValueError('({0}) is not recognized as valid functor'.format(func)) raise ValueError('({0}) is not recognized as valid functor'.format(func))
if func == 'scale': if func == 'scale' or func == 'project':
return scale_value(groups, value_data, x, scale) return scale_value(groups, value_data, x, scale)
else: else:
return copy_value(groups, value_data) return copy_value(groups, value_data)
...@@ -274,6 +310,8 @@ def aggregate(groups, x, func, ddof=1): ...@@ -274,6 +310,8 @@ def aggregate(groups, x, func, ddof=1):
value_data = agg_sum(groups, x) value_data = agg_sum(groups, x)
elif func == 'abssum' or func == 'scale': elif func == 'abssum' or func == 'scale':
value_data = agg_abssum(groups, x) value_data = agg_abssum(groups, x)
elif func == 'sqrsum' or func == 'project':
value_data = agg_sqrsum(groups, x)
else: else:
raise ValueError('({0}) is not recognized as valid functor'.format(func)) raise ValueError('({0}) is not recognized as valid functor'.format(func))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment