Commit 54edf78f authored by Dr.李's avatar Dr.李

update code base for simplicity

parent 9d3a00d0
...@@ -22,8 +22,7 @@ def risk_analysis(net_weight_series: pd.Series, ...@@ -22,8 +22,7 @@ def risk_analysis(net_weight_series: pd.Series,
idiosyncratic, other_stats = neutralize(risk_table.values, idiosyncratic, other_stats = neutralize(risk_table.values,
next_bar_return_series.values, next_bar_return_series.values,
group_idx, group_idx,
output_exposure=True, detail=True)
output_explained=True)
systematic = other_stats['explained'] systematic = other_stats['explained']
exposure = other_stats['exposure'] exposure = other_stats['exposure']
......
...@@ -16,8 +16,7 @@ import alphamind.utilities as utils ...@@ -16,8 +16,7 @@ import alphamind.utilities as utils
def neutralize(x: np.ndarray, def neutralize(x: np.ndarray,
y: np.ndarray, y: np.ndarray,
groups: np.ndarray=None, groups: np.ndarray=None,
output_explained: bool=False, detail: bool=False,
output_exposure: bool=False,
weights: np.ndarray=None) \ weights: np.ndarray=None) \
-> Union[np.ndarray, Tuple[np.ndarray, Dict]]: -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
...@@ -27,47 +26,37 @@ def neutralize(x: np.ndarray, ...@@ -27,47 +26,37 @@ def neutralize(x: np.ndarray,
if weights is None: if weights is None:
weights = np.ones(len(y), dtype=float) weights = np.ones(len(y), dtype=float)
if groups is not None: output_dict = {}
res = np.zeros(y.shape)
if y.ndim == 2: if detail:
if output_explained: exposure = np.zeros(x.shape + (y.shape[1],))
explained = np.zeros(x.shape + (y.shape[1],)) explained = np.zeros(x.shape + (y.shape[1],))
if output_exposure: output_dict['exposure'] = exposure
exposure = np.zeros(x.shape + (y.shape[1],)) output_dict['explained'] = explained
else:
if output_explained:
explained = np.zeros(x.shape + (1,))
if output_exposure:
exposure = np.zeros(x.shape + (1,))
if groups is not None:
res = np.zeros(y.shape)
index_diff, order = utils.groupby(groups) index_diff, order = utils.groupby(groups)
start = 0 start = 0
for diff_loc in index_diff: if detail:
curr_idx = order[start:diff_loc + 1] for diff_loc in index_diff:
curr_x, b = _sub_step(x, y, weights, curr_idx, res) curr_idx = order[start:diff_loc + 1]
if output_exposure: curr_x, b = _sub_step(x, y, weights, curr_idx, res)
for i in range(exposure.shape[2]): exposure[curr_idx, :, :] = b
exposure[curr_idx, :, i] = b[:, i] explained[curr_idx] = ls_explain(curr_x, b)
if output_explained: start = diff_loc + 1
for i in range(explained.shape[2]): else:
explained[curr_idx] = ls_explain(curr_x, b) for diff_loc in index_diff:
start = diff_loc + 1 curr_idx = order[start:diff_loc + 1]
_sub_step(x, y, weights, curr_idx, res)
start = diff_loc + 1
else: else:
b = ls_fit(x, y, weights) b = ls_fit(x, y, weights)
res = ls_res(x, y, b) res = ls_res(x, y, b)
if output_explained: if detail:
explained = ls_explain(x, b) explained[:, :, :] = ls_explain(x, b)
if output_exposure: exposure[:] = b
exposure = b
output_dict = {}
if output_explained:
output_dict['explained'] = explained
if output_exposure:
output_dict['exposure'] = exposure
if output_dict: if output_dict:
return res, output_dict return res, output_dict
...@@ -76,10 +65,8 @@ def neutralize(x: np.ndarray, ...@@ -76,10 +65,8 @@ def neutralize(x: np.ndarray,
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def _sub_step(x, y, w, curr_idx, res): def _sub_step(x, y, w, curr_idx, res) -> Tuple[np.ndarray, np.ndarray]:
curr_x = x[curr_idx] curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx]
curr_y = y[curr_idx]
curr_w = w[curr_idx]
b = ls_fit(curr_x, curr_y, curr_w) b = ls_fit(curr_x, curr_y, curr_w)
res[curr_idx] = ls_res(curr_x, curr_y, b) res[curr_idx] = ls_res(curr_x, curr_y, b)
return curr_x, b return curr_x, b
...@@ -99,21 +86,18 @@ def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray: ...@@ -99,21 +86,18 @@ def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray: def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
n = b.shape[1] m, n = b.shape
explained = np.zeros(x.shape + (n,)) return b.reshape((1, m, n)) * x.reshape((-1, m, 1))
for i in range(n):
explained[:, :, i] = b[:, i] * x
return explained
if __name__ == '__main__': if __name__ == '__main__':
x = np.random.randn(3000) x = np.random.randn(50000, 10)
y = np.random.randn(3000) y = np.random.randn(50000, 1)
w = np.ones(3000) w = np.ones(50000)
import datetime as dt import datetime as dt
start = dt.datetime.now() start = dt.datetime.now()
for i in range(1000): for _ in range(1000):
ls_fit(x, y, w) ls_fit(x, y, w)
print(dt.datetime.now() - start) print(dt.datetime.now() - start)
...@@ -43,7 +43,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -43,7 +43,7 @@ class TestNeutralize(unittest.TestCase):
def test_neutralize_explain_output(self): def test_neutralize_explain_output(self):
y = self.y[:, 0].flatten() y = self.y[:, 0].flatten()
calc_res, other_stats = neutralize(self.x, y, output_explained=True) calc_res, other_stats = neutralize(self.x, y, detail=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(self.x, y) model.fit(self.x, y)
...@@ -54,7 +54,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -54,7 +54,7 @@ class TestNeutralize(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1)) np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1))
np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained) np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained)
calc_res, other_stats = neutralize(self.x, self.y, output_explained=True) calc_res, other_stats = neutralize(self.x, self.y, detail=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
model.fit(self.x, self.y) model.fit(self.x, self.y)
...@@ -69,7 +69,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -69,7 +69,7 @@ class TestNeutralize(unittest.TestCase):
def test_neutralize_explain_output_with_group(self): def test_neutralize_explain_output_with_group(self):
y = self.y[:, 0].flatten() y = self.y[:, 0].flatten()
calc_res, other_stats = neutralize(self.x, y, self.groups, output_explained=True) calc_res, other_stats = neutralize(self.x, y, self.groups, detail=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
for i in range(30): for i in range(30):
...@@ -81,7 +81,7 @@ class TestNeutralize(unittest.TestCase): ...@@ -81,7 +81,7 @@ class TestNeutralize(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1)) np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], exp_explained) np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], exp_explained)
calc_res, other_stats = neutralize(self.x, self.y, self.groups, output_explained=True) calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
model = LinearRegression(fit_intercept=False) model = LinearRegression(fit_intercept=False)
for i in range(30): for i in range(30):
......
...@@ -67,6 +67,8 @@ def simple_sum(x, axis=0): ...@@ -67,6 +67,8 @@ def simple_sum(x, axis=0):
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
res[i] += x[i, j] res[i] += x[i, j]
else:
raise ValueError("axis value is not supported")
return res return res
...@@ -86,6 +88,8 @@ def simple_abssum(x, axis=0): ...@@ -86,6 +88,8 @@ def simple_abssum(x, axis=0):
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
res[i] += abs(x[i, j]) res[i] += abs(x[i, j])
else:
raise ValueError("axis value is not supported")
return res return res
...@@ -106,6 +110,8 @@ def simple_mean(x, axis=0): ...@@ -106,6 +110,8 @@ def simple_mean(x, axis=0):
for j in range(width): for j in range(width):
res[i] += x[i, j] res[i] += x[i, j]
res[i] /= width res[i] /= width
else:
raise ValueError("axis value is not supported")
return res return res
...@@ -129,6 +135,8 @@ def simple_std(x, axis=0, ddof=1): ...@@ -129,6 +135,8 @@ def simple_std(x, axis=0, ddof=1):
res[i] += x[i, j] * x[i, j] res[i] += x[i, j] * x[i, j]
sum_mat[i] += x[i, j] sum_mat[i] += x[i, j]
res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof)) res[i] = math.sqrt((res[i] - sum_mat[i] * sum_mat[i] / width) / (width - ddof))
else:
raise ValueError("axis value is not supported")
return res return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment