Commit 31569ef4 authored by Dr.李's avatar Dr.李

output exposure

parent 1a454a09
......@@ -29,17 +29,14 @@ ctypedef long long int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef list groupby(long[:] groups):
cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i
cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef list res = []
cdef np.ndarray[long long, ndim=1] npy_array
cdef cpp_vector[int64_t] v
cdef long long* arr_ptr
for i in range(length):
curr_tag = groups[i]
......@@ -50,10 +47,7 @@ cpdef list groupby(long[:] groups):
else:
deref(it).second.push_back(i)
for v in group_ids.values():
res.append(v)
return res
return group_ids.values()
@cython.boundscheck(False)
......@@ -156,7 +150,6 @@ cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
loop_idx1 = i*width
for j in range(width):
res_ptr[loop_idx1 + j] /= curr
......@@ -202,7 +195,6 @@ cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_
for i in range(max_g+1):
curr = bin_count_ptr[i]
loop_idx1 = i * width
if curr != 0:
for j in range(width):
loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
......
......@@ -14,14 +14,18 @@ from alphamind.aggregate import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False) \
-> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
-> Tuple[np.ndarray, Tuple[Union[np.ndarray, np.ndarray]]]:
if groups is not None:
res = zeros(y.shape)
if y.ndim == 2 and output_explained:
if y.ndim == 2:
if output_explained:
explained = zeros(x.shape + (y.shape[1],))
exposure = zeros(x.shape + (y.shape[1],))
else:
explained = zeros(x.shape)
exposure = zeros(x.shape)
groups_ids = groupby(groups)
for curr_idx in groups_ids:
......@@ -29,18 +33,24 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
curr_y = y[curr_idx]
b = ls_fit(x[curr_idx], y[curr_idx])
res[curr_idx] = ls_res(curr_x, curr_y, b)
if exposure.ndim == 3:
for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i]
else:
exposure[curr_idx] = b
if output_explained:
explained[curr_idx] = ls_explain(curr_x, b)
if output_explained:
return res, explained
return res, (exposure, explained)
else:
return res
return res, (exposure,)
else:
b = ls_fit(x, y)
if output_explained:
return ls_res(x, y, b), ls_explain(x, b)
return ls_res(x, y, b), (b, ls_explain(x, b))
else:
return ls_res(x, y, b)
return ls_res(x, y, b), (b,)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
......
......@@ -18,7 +18,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10)
calc_res = neutralize(x, y)
calc_res, _ = neutralize(x, y)
model = LinearRegression(fit_intercept=False)
model.fit(x, y)
......@@ -46,7 +46,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000)
x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, output_explained=True)
calc_res, (b, calc_explained) = neutralize(x, y, output_explained=True)
model = LinearRegression(fit_intercept=False)
model.fit(x, y)
......@@ -60,7 +60,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, output_explained=True)
calc_res, (b, calc_explained) = neutralize(x, y, output_explained=True)
model = LinearRegression(fit_intercept=False)
model.fit(x, y)
......@@ -77,7 +77,7 @@ class TestNeutralize(unittest.TestCase):
x = np.random.randn(3000, 10)
groups = np.random.randint(30, size=3000)
calc_res, calc_explained = neutralize(x, y, groups, output_explained=True)
calc_res, (b, calc_explained) = neutralize(x, y, groups, output_explained=True)
model = LinearRegression(fit_intercept=False)
for i in range(30):
......@@ -92,7 +92,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, groups, output_explained=True)
calc_res, (b, calc_explained) = neutralize(x, y, groups, output_explained=True)
model = LinearRegression(fit_intercept=False)
for i in range(30):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment