Commit 794b50ea authored by Dr.李's avatar Dr.李

update model and composer

parent 30aa9db2
...@@ -21,7 +21,7 @@ Back test parameter settings ...@@ -21,7 +21,7 @@ Back test parameter settings
""" """
start_date = '2010-01-01' start_date = '2010-01-01'
end_date = '2018-01-26' end_date = '2018-01-29'
frequency = '10b' frequency = '10b'
method = 'risk_neutral' method = 'risk_neutral'
...@@ -216,7 +216,7 @@ def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True ...@@ -216,7 +216,7 @@ def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True
def worker_func_positive(factor_name): def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q'] neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine() engine = SqlEngine()
benchmark_code = 905 benchmark_code = 905
universe_name = ['zz500'] universe_name = ['zz500']
...@@ -226,7 +226,7 @@ def worker_func_positive(factor_name): ...@@ -226,7 +226,7 @@ def worker_func_positive(factor_name):
def worker_func_negative(factor_name): def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q'] neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine() engine = SqlEngine()
benchmark_code = 905 benchmark_code = 905
universe_name = ['zz500'] universe_name = ['zz500']
...@@ -235,34 +235,34 @@ def worker_func_negative(factor_name): ...@@ -235,34 +235,34 @@ def worker_func_negative(factor_name):
if __name__ == '__main__': if __name__ == '__main__':
# from dask.distributed import Client from dask.distributed import Client
#
# client = Client('10.63.6.176:8786')
#
# engine = SqlEngine()
# df = engine.fetch_factor_coverage()
# df = df[df.universe == 'zz800'].groupby('factor').mean()
# df = df[df.coverage >= 0.98]
#
# tasks = client.map(worker_func_positive, df.index.tolist())
# res1 = client.gather(tasks)
#
# tasks = client.map(worker_func_negative, df.index.tolist())
# res2 = client.gather(tasks)
#
# factor_df = pd.DataFrame()
#
# for f_name, df in res1:
# factor_df[f_name] = df['returns']
#
# for f_name, df in res2:
# factor_df[f_name] = df['returns']
factor_name = LAST('ep_q') # LAST('EBITDA') / LAST('ev') client = Client('192.168.0.102:8786')
f_name, ret_df = worker_func_positive(factor_name)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), engine = SqlEngine()
title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format( df = engine.fetch_factor_coverage()
frequency, factor_name, 905), df = df[df.universe == 'zz800'].groupby('factor').mean()
secondary_y='tc_cost') df = df[df.coverage >= 0.98]
plt.show()
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
for f_name, df in res1:
factor_df[f_name] = df['returns']
for f_name, df in res2:
factor_df[f_name] = df['returns']
# factor_name = LAST('EBITDA') / LAST('ev')
# f_name, ret_df = worker_func_positive(factor_name)
#
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, 905),
# secondary_y='tc_cost')
# plt.show()
...@@ -31,7 +31,7 @@ class ConstLinearModelImpl(object): ...@@ -31,7 +31,7 @@ class ConstLinearModelImpl(object):
class ConstLinearModel(ModelBase): class ConstLinearModel(ModelBase):
def __init__(self, def __init__(self,
features: list = None, features=None,
weights: np.ndarray = None): weights: np.ndarray = None):
super().__init__(features) super().__init__(features)
if features is not None and weights is not None: if features is not None and weights is not None:
...@@ -59,7 +59,6 @@ class LinearRegression(ModelBase): ...@@ -59,7 +59,6 @@ class LinearRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs): def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features) super().__init__(features)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs) self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
...@@ -87,7 +86,6 @@ class LassoRegression(ModelBase): ...@@ -87,7 +86,6 @@ class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs): def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features) super().__init__(features)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs) self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
......
...@@ -8,6 +8,7 @@ Created on 2017-9-4 ...@@ -8,6 +8,7 @@ Created on 2017-9-4
import abc import abc
import arrow import arrow
import numpy as np import numpy as np
from simpleutils.miscellaneous import list_eq
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode from alphamind.utilities import encode
from alphamind.utilities import decode from alphamind.utilities import decode
...@@ -16,7 +17,7 @@ from alphamind.data.transformer import Transformer ...@@ -16,7 +17,7 @@ from alphamind.data.transformer import Transformer
class ModelBase(metaclass=abc.ABCMeta): class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None): def __init__(self, features=None):
if features is not None: if features is not None:
self.formulas = Transformer(features) self.formulas = Transformer(features)
self.features = self.formulas.names self.features = self.formulas.names
...@@ -25,6 +26,12 @@ class ModelBase(metaclass=abc.ABCMeta): ...@@ -25,6 +26,12 @@ class ModelBase(metaclass=abc.ABCMeta):
self.impl = None self.impl = None
self.trained_time = None self.trained_time = None
def __eq__(self, rhs):
return encode(self.impl) == encode(rhs.impl) \
and self.trained_time == rhs.trained_time \
and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas)
def fit(self, x: np.ndarray, y: np.ndarray): def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y.flatten()) self.impl.fit(x, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
......
...@@ -32,12 +32,10 @@ class RandomForestRegressor(ModelBase): ...@@ -32,12 +32,10 @@ class RandomForestRegressor(ModelBase):
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators, self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features, max_features=max_features,
**kwargs) **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -66,12 +64,10 @@ class RandomForestClassifier(ModelBase): ...@@ -66,12 +64,10 @@ class RandomForestClassifier(ModelBase):
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators, self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features, max_features=max_features,
**kwargs) **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -108,7 +104,6 @@ class XGBRegressor(ModelBase): ...@@ -108,7 +104,6 @@ class XGBRegressor(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -132,7 +127,7 @@ class XGBClassifier(ModelBase): ...@@ -132,7 +127,7 @@ class XGBClassifier(ModelBase):
n_estimators: int=100, n_estimators: int=100,
learning_rate: float=0.1, learning_rate: float=0.1,
max_depth: int=3, max_depth: int=3,
features: List = None, features=None,
n_jobs: int=1, n_jobs: int=1,
**kwargs): **kwargs):
super().__init__(features) super().__init__(features)
...@@ -145,7 +140,6 @@ class XGBClassifier(ModelBase): ...@@ -145,7 +140,6 @@ class XGBClassifier(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -230,7 +224,6 @@ class XGBTrainer(ModelBase): ...@@ -230,7 +224,6 @@ class XGBTrainer(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
......
...@@ -9,10 +9,15 @@ import unittest ...@@ -9,10 +9,15 @@ import unittest
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.model.composer import DataMeta from alphamind.model.composer import DataMeta
from alphamind.model.composer import Composer from alphamind.model.composer import Composer
from alphamind.model.treemodel import XGBClassifier
class TestComposer(unittest.TestCase): class TestComposer(unittest.TestCase):
def _assert_composer_equal(self, lhs: Composer, rhs: Composer):
self.assertEqual(lhs.alpha_model, rhs.alpha_model)
self.assertEqual(lhs.data_meta, rhs.data_meta)
def test_data_meta_persistence(self): def test_data_meta_persistence(self):
freq = '5b' freq = '5b'
...@@ -49,7 +54,36 @@ class TestComposer(unittest.TestCase): ...@@ -49,7 +54,36 @@ class TestComposer(unittest.TestCase):
self.assertEqual(data_meta.data_source, loaded_data.data_source) self.assertEqual(data_meta.data_source, loaded_data.data_source)
def test_composer_persistence(self): def test_composer_persistence(self):
pass freq = '5b'
universe = Universe('custom', ['zz800'])
batch = 4
neutralized_risk = ['SIZE']
risk_model = 'long'
pre_process = ['standardize', 'winsorize_normal']
post_process = ['standardize', 'winsorize_normal']
warm_start = 2
data_source = 'postgresql://user:pwd@server/dummy'
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source)
features = {'f1': 'closePrice', 'f2': 'openPrice'}
alpha_model = XGBClassifier(features=features)
composer = Composer(alpha_model=alpha_model,
data_meta=data_meta)
comp_desc = composer.save()
loaded_comp = Composer.load(comp_desc)
self._assert_composer_equal(composer, loaded_comp)
...@@ -32,6 +32,7 @@ from alphamind.tests.model.test_modelbase import TestModelBase ...@@ -32,6 +32,7 @@ from alphamind.tests.model.test_modelbase import TestModelBase
from alphamind.tests.model.test_linearmodel import TestLinearModel from alphamind.tests.model.test_linearmodel import TestLinearModel
from alphamind.tests.model.test_treemodel import TestTreeModel from alphamind.tests.model.test_treemodel import TestTreeModel
from alphamind.tests.model.test_loader import TestLoader from alphamind.tests.model.test_loader import TestLoader
from alphamind.tests.model.test_composer import TestComposer
from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor
...@@ -61,6 +62,7 @@ if __name__ == '__main__': ...@@ -61,6 +62,7 @@ if __name__ == '__main__':
TestLinearModel, TestLinearModel,
TestTreeModel, TestTreeModel,
TestLoader, TestLoader,
TestComposer,
TestNaiveExecutor, TestNaiveExecutor,
TestThresholdExecutor, TestThresholdExecutor,
TestTargetVolExecutor, TestTargetVolExecutor,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment