Commit beb0dcd3 authored by Dr.李's avatar Dr.李

small enhancements

parent 104437fd
......@@ -38,10 +38,7 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
groups_ids = groupby(groups)
for curr_idx in groups_ids.values():
curr_x = x[curr_idx]
curr_y = y[curr_idx]
b = ls_fit(curr_x, curr_y)
res[curr_idx] = ls_res(curr_x, curr_y, b)
curr_x, b = _sub_step(x, y, curr_idx, res)
if output_exposure:
for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i]
......@@ -69,6 +66,15 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
return res
@nb.njit(nogil=True, cache=True)
def _sub_step(x, y, curr_idx, res):
curr_x = x[curr_idx]
curr_y = y[curr_idx]
b = ls_fit(curr_x, curr_y)
res[curr_idx] = ls_res(curr_x, curr_y, b)
return curr_x, b
@nb.njit(nogil=True, cache=True)
def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
x_bar = x.T
......
......@@ -7,6 +7,7 @@ Created on 2017-5-10
from typing import Union
import numpy as np
import numba as nb
from alphamind.cyimpl import groupby
from alphamind.data.neutralize import ls_fit
......@@ -20,7 +21,7 @@ class LinearModel(object):
self.model_parameter = _train(x, y, groups)
def predict(self, x, groups=None):
if groups is not None and isinstance(self.model_parameter, dict):
if groups is not None and self.model_parameter.ndim == 2:
names = np.unique(groups)
return multiple_prediction(names, self.model_parameter, x, groups)
elif self.model_parameter is None:
......@@ -31,6 +32,7 @@ class LinearModel(object):
raise ValueError("grouped x value can't be used for vanilla linear model")
@nb.njit(nogil=True, cache=True)
def multiple_prediction(names, model_parames, x, groups):
pred_v = np.zeros(x.shape[0])
for name in names:
......@@ -40,22 +42,28 @@ def multiple_prediction(names, model_parames, x, groups):
return pred_v
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, dict]:
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
if groups is None:
return ls_fit(x, y)
else:
groups_ids = groupby(groups)
res_beta = {}
res_beta = np.zeros((max(groups_ids.keys())+1, x.shape[1]))
for k, curr_idx in groups_ids.items():
curr_x = x[curr_idx]
curr_y = y[curr_idx]
res_beta[k] = ls_fit(curr_x, curr_y)
_train_sub_group(x, y, k, curr_idx, res_beta)
return res_beta
@nb.njit(nogil=True, cache=True)
def _train_sub_group(x, y, k, curr_idx, res):
curr_x = x[curr_idx]
curr_y = y[curr_idx]
res[k] = ls_fit(curr_x, curr_y)
if __name__ == '__main__':
import datetime as dt
x = np.random.randn(3000, 10)
y = np.random.randn(3000)
groups = np.random.randint(30, size=3000)
......@@ -65,5 +73,12 @@ if __name__ == '__main__':
model = LinearModel()
start = dt.datetime.now()
for i in range(5000):
model.calibrate(x, y, groups)
print(dt.datetime.now() - start)
start = dt.datetime.now()
for i in range(50000):
model.predict(to_x, to_groups)
print(dt.datetime.now() - start)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment