Commit beb0dcd3 authored by Dr.李's avatar Dr.李

small enhancements

parent 104437fd
...@@ -38,10 +38,7 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -38,10 +38,7 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
groups_ids = groupby(groups) groups_ids = groupby(groups)
for curr_idx in groups_ids.values(): for curr_idx in groups_ids.values():
curr_x = x[curr_idx] curr_x, b = _sub_step(x, y, curr_idx, res)
curr_y = y[curr_idx]
b = ls_fit(curr_x, curr_y)
res[curr_idx] = ls_res(curr_x, curr_y, b)
if output_exposure: if output_exposure:
for i in range(exposure.shape[2]): for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i] exposure[curr_idx, :, i] = b[:, i]
...@@ -69,6 +66,15 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -69,6 +66,15 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res return res
@nb.njit(nogil=True, cache=True)
def _sub_step(x, y, curr_idx, res):
curr_x = x[curr_idx]
curr_y = y[curr_idx]
b = ls_fit(curr_x, curr_y)
res[curr_idx] = ls_res(curr_x, curr_y, b)
return curr_x, b
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray: def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T x_bar = x.T
......
...@@ -7,6 +7,7 @@ Created on 2017-5-10 ...@@ -7,6 +7,7 @@ Created on 2017-5-10
from typing import Union from typing import Union
import numpy as np import numpy as np
import numba as nb
from alphamind.cyimpl import groupby from alphamind.cyimpl import groupby
from alphamind.data.neutralize import ls_fit from alphamind.data.neutralize import ls_fit
...@@ -20,7 +21,7 @@ class LinearModel(object): ...@@ -20,7 +21,7 @@ class LinearModel(object):
self.model_parameter = _train(x, y, groups) self.model_parameter = _train(x, y, groups)
def predict(self, x, groups=None): def predict(self, x, groups=None):
if groups is not None and isinstance(self.model_parameter, dict): if groups is not None and self.model_parameter.ndim == 2:
names = np.unique(groups) names = np.unique(groups)
return multiple_prediction(names, self.model_parameter, x, groups) return multiple_prediction(names, self.model_parameter, x, groups)
elif self.model_parameter is None: elif self.model_parameter is None:
...@@ -31,6 +32,7 @@ class LinearModel(object): ...@@ -31,6 +32,7 @@ class LinearModel(object):
raise ValueError("grouped x value can't be used for vanilla linear model") raise ValueError("grouped x value can't be used for vanilla linear model")
@nb.njit(nogil=True, cache=True)
def multiple_prediction(names, model_parames, x, groups): def multiple_prediction(names, model_parames, x, groups):
pred_v = np.zeros(x.shape[0]) pred_v = np.zeros(x.shape[0])
for name in names: for name in names:
...@@ -40,22 +42,28 @@ def multiple_prediction(names, model_parames, x, groups): ...@@ -40,22 +42,28 @@ def multiple_prediction(names, model_parames, x, groups):
return pred_v return pred_v
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, dict]: def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if groups is None: if groups is None:
return ls_fit(x, y) return ls_fit(x, y)
else: else:
groups_ids = groupby(groups) groups_ids = groupby(groups)
res_beta = {} res_beta = np.zeros((max(groups_ids.keys())+1, x.shape[1]))
for k, curr_idx in groups_ids.items(): for k, curr_idx in groups_ids.items():
curr_x = x[curr_idx] _train_sub_group(x, y, k, curr_idx, res_beta)
curr_y = y[curr_idx]
res_beta[k] = ls_fit(curr_x, curr_y)
return res_beta return res_beta
@nb.njit(nogil=True, cache=True)
def _train_sub_group(x, y, k, curr_idx, res):
curr_x = x[curr_idx]
curr_y = y[curr_idx]
res[k] = ls_fit(curr_x, curr_y)
if __name__ == '__main__': if __name__ == '__main__':
import datetime as dt
x = np.random.randn(3000, 10) x = np.random.randn(3000, 10)
y = np.random.randn(3000) y = np.random.randn(3000)
groups = np.random.randint(30, size=3000) groups = np.random.randint(30, size=3000)
...@@ -65,5 +73,12 @@ if __name__ == '__main__': ...@@ -65,5 +73,12 @@ if __name__ == '__main__':
model = LinearModel() model = LinearModel()
model.calibrate(x, y, groups) start = dt.datetime.now()
model.predict(to_x, to_groups) for i in range(5000):
\ No newline at end of file model.calibrate(x, y, groups)
print(dt.datetime.now() - start)
start = dt.datetime.now()
for i in range(50000):
model.predict(to_x, to_groups)
print(dt.datetime.now() - start)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment