Commit 31569ef4 authored by Dr.李's avatar Dr.李

output exposure

parent 1a454a09
...@@ -29,17 +29,14 @@ ctypedef long long int64_t ...@@ -29,17 +29,14 @@ ctypedef long long int64_t
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
@cython.initializedcheck(False) @cython.initializedcheck(False)
cpdef list groupby(long[:] groups): cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0] cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i cdef long long i
cdef long curr_tag cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef list res = []
cdef np.ndarray[long long, ndim=1] npy_array cdef np.ndarray[long long, ndim=1] npy_array
cdef cpp_vector[int64_t] v
cdef long long* arr_ptr
for i in range(length): for i in range(length):
curr_tag = groups[i] curr_tag = groups[i]
...@@ -50,10 +47,7 @@ cpdef list groupby(long[:] groups): ...@@ -50,10 +47,7 @@ cpdef list groupby(long[:] groups):
else: else:
deref(it).second.push_back(i) deref(it).second.push_back(i)
for v in group_ids.values(): return group_ids.values()
res.append(v)
return res
@cython.boundscheck(False) @cython.boundscheck(False)
...@@ -156,7 +150,6 @@ cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size ...@@ -156,7 +150,6 @@ cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size
for i in range(max_g+1): for i in range(max_g+1):
curr = bin_count_ptr[i] curr = bin_count_ptr[i]
if curr != 0:
loop_idx1 = i*width loop_idx1 = i*width
for j in range(width): for j in range(width):
res_ptr[loop_idx1 + j] /= curr res_ptr[loop_idx1 + j] /= curr
...@@ -202,7 +195,6 @@ cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_ ...@@ -202,7 +195,6 @@ cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_
for i in range(max_g+1): for i in range(max_g+1):
curr = bin_count_ptr[i] curr = bin_count_ptr[i]
loop_idx1 = i * width loop_idx1 = i * width
if curr != 0:
for j in range(width): for j in range(width):
loop_idx2 = loop_idx1 + j loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof)) running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
......
...@@ -14,14 +14,18 @@ from alphamind.aggregate import groupby ...@@ -14,14 +14,18 @@ from alphamind.aggregate import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False) \ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False) \
-> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: -> Tuple[np.ndarray, Tuple[Union[np.ndarray, np.ndarray]]]:
if groups is not None: if groups is not None:
res = zeros(y.shape) res = zeros(y.shape)
if y.ndim == 2 and output_explained: if y.ndim == 2:
if output_explained:
explained = zeros(x.shape + (y.shape[1],)) explained = zeros(x.shape + (y.shape[1],))
exposure = zeros(x.shape + (y.shape[1],))
else: else:
explained = zeros(x.shape) explained = zeros(x.shape)
exposure = zeros(x.shape)
groups_ids = groupby(groups) groups_ids = groupby(groups)
for curr_idx in groups_ids: for curr_idx in groups_ids:
...@@ -29,18 +33,24 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -29,18 +33,24 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
curr_y = y[curr_idx] curr_y = y[curr_idx]
b = ls_fit(x[curr_idx], y[curr_idx]) b = ls_fit(x[curr_idx], y[curr_idx])
res[curr_idx] = ls_res(curr_x, curr_y, b) res[curr_idx] = ls_res(curr_x, curr_y, b)
if exposure.ndim == 3:
for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i]
else:
exposure[curr_idx] = b
if output_explained: if output_explained:
explained[curr_idx] = ls_explain(curr_x, b) explained[curr_idx] = ls_explain(curr_x, b)
if output_explained: if output_explained:
return res, explained return res, (exposure, explained)
else: else:
return res return res, (exposure,)
else: else:
b = ls_fit(x, y) b = ls_fit(x, y)
if output_explained: if output_explained:
return ls_res(x, y, b), ls_explain(x, b) return ls_res(x, y, b), (b, ls_explain(x, b))
else: else:
return ls_res(x, y, b) return ls_res(x, y, b), (b,)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray: def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
......
...@@ -18,7 +18,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -18,7 +18,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4) y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_res = neutralize(x, y) calc_res, _ = neutralize(x, y)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(x, y) model.fit(x, y)
...@@ -46,7 +46,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000) y = np.random.randn(3000)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, output_explained=True) calc_res, (b, calc_explained) = neutralize(x, y, output_explained=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(x, y) model.fit(x, y)
...@@ -60,7 +60,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -60,7 +60,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4) y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, output_explained=True) calc_res, (b, calc_explained) = neutralize(x, y, output_explained=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(x, y) model.fit(x, y)
...@@ -77,7 +77,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -77,7 +77,7 @@ class TestNeutralize(unittest.TestCase):
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
groups = np.random.randint(30, size=3000) groups = np.random.randint(30, size=3000)
calc_res, calc_explained = neutralize(x, y, groups, output_explained=True) calc_res, (b, calc_explained) = neutralize(x, y, groups, output_explained=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
for i in range(30): for i in range(30):
...@@ -92,7 +92,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -92,7 +92,7 @@ class TestNeutralize(unittest.TestCase):
y = np.random.randn(3000, 4) y = np.random.randn(3000, 4)
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
calc_res, calc_explained = neutralize(x, y, groups, output_explained=True) calc_res, (b, calc_explained) = neutralize(x, y, groups, output_explained=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
for i in range(30): for i in range(30):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment