Commit 46c0dc18 authored by Dr.李's avatar Dr.李

updated composer to reuse data

parent efa8a3dc
...@@ -8,6 +8,7 @@ Created on 2017-9-27 ...@@ -8,6 +8,7 @@ Created on 2017-9-27
import copy import copy
import bisect import bisect
from typing import Iterable from typing import Iterable
from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from simpleutils.miscellaneous import list_eq from simpleutils.miscellaneous import list_eq
...@@ -152,7 +153,7 @@ def train_model(ref_date: str, ...@@ -152,7 +153,7 @@ def train_model(ref_date: str,
x_values = train_data['train']['x'] x_values = train_data['train']['x']
y_values = train_data['train']['y'] y_values = train_data['train']['y']
base_model.fit(x_values, y_values) base_model.fit(x_values, y_values)
return base_model return base_model, x_values, y_values
def predict_by_model(ref_date: str, def predict_by_model(ref_date: str,
...@@ -164,10 +165,10 @@ def predict_by_model(ref_date: str, ...@@ -164,10 +165,10 @@ def predict_by_model(ref_date: str,
predict_data = data_meta.fetch_predict_data(ref_date, alpha_model) predict_data = data_meta.fetch_predict_data(ref_date, alpha_model)
codes, x_values = predict_data['predict']['code'], predict_data['predict']['x'] codes, x_values = predict_data['predict']['code'], predict_data['predict']['x']
return pd.DataFrame(alpha_model.predict(x_values).flatten(), index=codes) return pd.DataFrame(alpha_model.predict(x_values).flatten(), index=codes), x_values
class Composer(object): class Composer:
def __init__(self, def __init__(self,
alpha_model: ModelBase, alpha_model: ModelBase,
data_meta: DataMeta): data_meta: DataMeta):
...@@ -178,20 +179,23 @@ class Composer(object): ...@@ -178,20 +179,23 @@ class Composer(object):
self.is_updated = False self.is_updated = False
self.sorted_keys = None self.sorted_keys = None
def train(self, ref_date: str): def train(self, ref_date: str) -> Tuple[ModelBase, pd.DataFrame, pd.DataFrame]:
self.models[ref_date] = train_model(ref_date, self.alpha_model, self.data_meta) model, x, y = train_model(ref_date, self.alpha_model, self.data_meta)
self.models[ref_date] = model
self.is_updated = False self.is_updated = False
return model, x, y
def predict(self, ref_date: str, x: pd.DataFrame = None) -> pd.DataFrame: def predict(self, ref_date: str, x: pd.DataFrame = None) -> Tuple[pd.DataFrame, pd.DataFrame]:
model = self._fetch_latest_model(ref_date) model = self._fetch_latest_model(ref_date)
if x is None: if x is None:
return predict_by_model(ref_date, model, self.data_meta) return predict_by_model(ref_date, model, self.data_meta)
else: else:
x_values = x.values x_values = x.values
codes = x.index codes = x.index
return pd.DataFrame(model.predict(x_values).flatten(), index=codes) return pd.DataFrame(model.predict(x_values).flatten(), index=codes), x
def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None, d_type: str = 'test') -> float: def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None, d_type: str = 'test') \
-> Tuple[float, pd.DataFrame, pd.DataFrame]:
model = self._fetch_latest_model(ref_date) model = self._fetch_latest_model(ref_date)
if x is None: if x is None:
if d_type == 'test': if d_type == 'test':
...@@ -204,14 +208,14 @@ class Composer(object): ...@@ -204,14 +208,14 @@ class Composer(object):
x = test_data['train']['x'] x = test_data['train']['x']
if y is None: if y is None:
y = test_data['train']['y'] y = test_data['train']['y']
return model.score(x, y) return model.score(x, y), x, y
def ic(self, ref_date) -> float: def ic(self, ref_date) -> Tuple[float, pd.DataFrame, pd.DataFrame]:
model = self._fetch_latest_model(ref_date) model = self._fetch_latest_model(ref_date)
test_data = self.data_meta.fetch_predict_data(ref_date, model) test_data = self.data_meta.fetch_predict_data(ref_date, model)
x = test_data['predict']['x'] x = test_data['predict']['x']
y = test_data['predict']['y'] y = test_data['predict']['y']
return model.ic(x, y) return model.ic(x, y), x, y
def _fetch_latest_model(self, ref_date) -> ModelBase: def _fetch_latest_model(self, ref_date) -> ModelBase:
if self.is_updated: if self.is_updated:
...@@ -292,7 +296,6 @@ if __name__ == '__main__': ...@@ -292,7 +296,6 @@ if __name__ == '__main__':
regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target) regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target)
regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta) regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta)
#regression_composer.train('2010-07-07')
data_package1 = fetch_data_package(engine, data_package1 = fetch_data_package(engine,
alpha_factors=[kernal_feature], alpha_factors=[kernal_feature],
...@@ -307,5 +310,3 @@ if __name__ == '__main__': ...@@ -307,5 +310,3 @@ if __name__ == '__main__':
pre_process=pre_process, pre_process=pre_process,
post_process=post_process, post_process=post_process,
fit_target=fit_target) fit_target=fit_target)
pass
\ No newline at end of file
...@@ -63,7 +63,7 @@ class TestFactorAnalysis(unittest.TestCase): ...@@ -63,7 +63,7 @@ class TestFactorAnalysis(unittest.TestCase):
weight = weight_table.weight weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'][-1], 2.0) self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor) np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor)
self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values) self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values)
...@@ -91,7 +91,7 @@ class TestFactorAnalysis(unittest.TestCase): ...@@ -91,7 +91,7 @@ class TestFactorAnalysis(unittest.TestCase):
constraints=constraints) constraints=constraints)
weight = weight_table.weight weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'][-1], 2.0) self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor) np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor)
......
...@@ -45,7 +45,7 @@ class TestQuantileAnalysis(unittest.TestCase): ...@@ -45,7 +45,7 @@ class TestQuantileAnalysis(unittest.TestCase):
for i, value in enumerate(expected_res): for i, value in enumerate(expected_res):
expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum() expected_res[i] = (1. + res_weight) * value - res_weight * grouped_return.sum()
calculated_res = er_quantile_analysis(x, n_bins, self.r) calculated_res = er_quantile_analysis(x, n_bins, self.r, de_trend=True)
np.testing.assert_array_almost_equal(expected_res, calculated_res) np.testing.assert_array_almost_equal(expected_res, calculated_res)
...@@ -55,7 +55,6 @@ class TestQuantileAnalysis(unittest.TestCase): ...@@ -55,7 +55,6 @@ class TestQuantileAnalysis(unittest.TestCase):
self.x_w, self.x_w,
self.r, self.r,
n_bins=self.n_bins, n_bins=self.n_bins,
do_neutralize=False,
pre_process=[], pre_process=[],
post_process=[]) post_process=[])
...@@ -69,7 +68,6 @@ class TestQuantileAnalysis(unittest.TestCase): ...@@ -69,7 +68,6 @@ class TestQuantileAnalysis(unittest.TestCase):
self.x_w, self.x_w,
self.r, self.r,
n_bins=self.n_bins, n_bins=self.n_bins,
do_neutralize=True,
risk_exp=self.risk_exp, risk_exp=self.risk_exp,
pre_process=[winsorize_normal, standardize], pre_process=[winsorize_normal, standardize],
post_process=[standardize]) post_process=[standardize])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment