Commit b19af2d0 authored by Yucheng's avatar Yucheng

Merge branch 'master' of https://github.com/lion-sing/alpha-mind

parents 058357c0 4622cdf5
[submodule "alphamind/pfopt"] [submodule "alphamind/pfopt"]
path = alphamind/pfopt path = alphamind/pfopt
url = https://github.com/alpha-miner/portfolio-optimizer.git url = https://github.com/alpha-miner/portfolio-optimizer.git
[submodule "xgboost"]
path = xgboost
url = https://github.com/dmlc/xgboost.git
...@@ -42,16 +42,16 @@ alpha - mind 提供了多因子研究中常用的工具链,包括: ...@@ -42,16 +42,16 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
在Windows上完整安装,需要有C++编译器(例如msvc): 在Windows上完整安装,需要有C++编译器(例如msvc):
```bash ```bash
build_windows_dependencies.bat build_windows_dependencies.bat
``` ```
* Linux * Linux
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran) 在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
```bash ```bash
build_linux_dependencies.sh build_linux_dependencies.sh
``` ```
## 安装 ## 安装
......
...@@ -4,3 +4,6 @@ Created on 2017-4-25 ...@@ -4,3 +4,6 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
__version__ = "0.1.0"
...@@ -25,11 +25,15 @@ from alphamind.data.standardize import projection ...@@ -25,11 +25,15 @@ from alphamind.data.standardize import projection
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.data.engines.sqlengine import factor_tables from alphamind.data.engines.sqlengine import factor_tables
from alphamind.model.linearmodel import LinearRegression from alphamind.model import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model import ConstLinearModel
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model import LogisticRegression
from alphamind.model.loader import load_model from alphamind.model import RandomForestRegressor
from alphamind.model import RandomForestClassifier
from alphamind.model import XGBRegressor
from alphamind.model import XGBClassifier
from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase from alphamind.model.data_preparing import fetch_train_phase
...@@ -39,27 +43,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor ...@@ -39,27 +43,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.pipeline import ExecutionPipeline from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
__all__ = [ __all__ = [
...@@ -85,7 +69,11 @@ __all__ = [ ...@@ -85,7 +69,11 @@ __all__ = [
'LinearRegression', 'LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor', 'RandomForestRegressor',
'RandomForestClassifier',
'XGBRegressor',
'XGBClassifier',
'load_model', 'load_model',
'NaiveExecutor', 'NaiveExecutor',
'ThresholdExecutor', 'ThresholdExecutor',
......
...@@ -664,63 +664,6 @@ class Experimental(Base): ...@@ -664,63 +664,6 @@ class Experimental(Base):
DROEAfterNonRecurring = Column(Float(53)) DROEAfterNonRecurring = Column(Float(53))
CFinc1 = Column(Float(53)) CFinc1 = Column(Float(53))
xueqiu_hotness = Column(Float(53)) xueqiu_hotness = Column(Float(53))
con_eps = Column(Float(53))
con_pb = Column(Float(53))
con_pb_order = Column(Float(53))
con_pb_rolling = Column(Float(53))
con_pb_rolling_order = Column(Float(53))
con_pe = Column(Float(53))
con_pe_order = Column(Float(53))
con_pe_rolling = Column(Float(53))
con_pe_rolling_order = Column(Float(53))
con_peg = Column(Float(53))
con_peg_order = Column(Float(53))
con_peg_rolling = Column(Float(53))
con_peg_rolling_order = Column(Float(53))
con_ps = Column(Float(53))
con_ps_order = Column(Float(53))
con_ps_rolling = Column(Float(53))
con_ps_rolling_order = Column(Float(53))
con_target_price = Column(Float(53))
market_confidence_10d = Column(Float(53))
market_confidence_15d = Column(Float(53))
market_confidence_25d = Column(Float(53))
market_confidence_5d = Column(Float(53))
market_confidence_75d = Column(Float(53))
optimism_confidence_10d = Column(Float(53))
optimism_confidence_15d = Column(Float(53))
optimism_confidence_25d = Column(Float(53))
optimism_confidence_5d = Column(Float(53))
optimism_confidence_75d = Column(Float(53))
pessimism_confidence_10d = Column(Float(53))
pessimism_confidence_15d = Column(Float(53))
pessimism_confidence_25d = Column(Float(53))
pessimism_confidence_5d = Column(Float(53))
pessimism_confidence_75d = Column(Float(53))
con_na_yoy = Column(Float(53))
con_np_yoy = Column(Float(53))
con_npcgrate_13w = Column(Float(53))
con_npcgrate_1w = Column(Float(53))
con_npcgrate_26w = Column(Float(53))
con_npcgrate_2y = Column(Float(53))
con_npcgrate_4w = Column(Float(53))
con_npcgrate_52w = Column(Float(53))
con_or_yoy = Column(Float(53))
con_roe_yoy1 = Column(Float(53))
con_roe_yoy2 = Column(Float(53))
con_roe_yoy3 = Column(Float(53))
con_eps_rolling = Column(Float(53))
con_np = Column(Float(53))
con_np_rolling = Column(Float(53))
con_or = Column(Float(53))
con_or_rolling = Column(Float(53))
con_roe = Column(Float(53))
con_na = Column(Float(53))
con_na_rolling = Column(Float(53))
mcap = Column(Float(53))
tcap = Column(Float(53))
ta = Column(Float(53))
na = Column(Float(53))
eps_q = Column(Float(53)) eps_q = Column(Float(53))
roe_q = Column(Float(53)) roe_q = Column(Float(53))
cfinc1_q = Column(Float(53)) cfinc1_q = Column(Float(53))
......
...@@ -8,11 +8,22 @@ Created on 2017-5-2 ...@@ -8,11 +8,22 @@ Created on 2017-5-2
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
from alphamind.model.loader import load_model
__all__ = ['LinearRegression', __all__ = ['LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
'RandomForestRegressor'] 'LogisticRegression',
\ No newline at end of file 'RandomForestRegressor',
'RandomForestClassifier',
'XGBRegressor',
'XGBClassifier',
'load_model']
\ No newline at end of file
...@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions ...@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from PyFin.DateUtilities import Period from PyFin.DateUtilities import Period
from PyFin.Enums import TimeUnits
from alphamind.data.transformer import Transformer from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
def _map_horizon(frequency: str) -> int:
parsed_period = Period(frequency)
unit = parsed_period.units()
length = parsed_period.length()
if unit == TimeUnits.BDays or unit == TimeUnits.Days:
return length - 1
elif unit == TimeUnits.Weeks:
return 5 * length - 1
elif unit == TimeUnits.Months:
return 22 * length - 1
else:
raise ValueError('{0} is an unrecognized frequency rule'.format(frequency))
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk): def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk):
...@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine, ...@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
dates = [d.strftime('%Y-%m-%d') for d in dates] dates = [d.strftime('%Y-%m-%d') for d in dates]
horizon = _map_horizon(frequency) horizon = map_freq(frequency)
if isinstance(factors, Transformer): if isinstance(factors, Transformer):
transformer = factors transformer = factors
...@@ -119,8 +105,10 @@ def batch_processing(x_values, ...@@ -119,8 +105,10 @@ def batch_processing(x_values,
post_process): post_process):
train_x_buckets = {} train_x_buckets = {}
train_y_buckets = {} train_y_buckets = {}
train_risk_buckets = {}
predict_x_buckets = {} predict_x_buckets = {}
predict_y_buckets = {} predict_y_buckets = {}
predict_risk_buckets = {}
for i, start in enumerate(groups[:-batch]): for i, start in enumerate(groups[:-batch]):
end = groups[i + batch] end = groups[i + batch]
...@@ -146,6 +134,8 @@ def batch_processing(x_values, ...@@ -146,6 +134,8 @@ def batch_processing(x_values,
risk_factors=this_risk_exp, risk_factors=this_risk_exp,
post_process=post_process) post_process=post_process)
train_risk_buckets[end] = this_risk_exp
left_index = bisect.bisect_right(group_label, start) left_index = bisect.bisect_right(group_label, start)
right_index = bisect.bisect_right(group_label, end) right_index = bisect.bisect_right(group_label, end)
...@@ -165,6 +155,7 @@ def batch_processing(x_values, ...@@ -165,6 +155,7 @@ def batch_processing(x_values,
inner_left_index = bisect.bisect_left(sub_dates, end) inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = ne_x[inner_left_index:inner_right_index] predict_x_buckets[end] = ne_x[inner_left_index:inner_right_index]
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
this_raw_y = y_values[left_index:right_index] this_raw_y = y_values[left_index:right_index]
if len(this_raw_y) > 0: if len(this_raw_y) > 0:
...@@ -174,7 +165,7 @@ def batch_processing(x_values, ...@@ -174,7 +165,7 @@ def batch_processing(x_values,
post_process=post_process) post_process=post_process)
predict_y_buckets[end] = ne_y[inner_left_index:inner_right_index] predict_y_buckets[end] = ne_y[inner_left_index:inner_right_index]
return train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets return train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets
def fetch_data_package(engine: SqlEngine, def fetch_data_package(engine: SqlEngine,
...@@ -216,7 +207,7 @@ def fetch_data_package(engine: SqlEngine, ...@@ -216,7 +207,7 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Loading data is finished") alpha_logger.info("Loading data is finished")
train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets = batch_processing( train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets = batch_processing(
x_values, x_values,
y_values, y_values,
dates, dates,
...@@ -231,8 +222,8 @@ def fetch_data_package(engine: SqlEngine, ...@@ -231,8 +222,8 @@ def fetch_data_package(engine: SqlEngine,
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['settlement'] = return_df ret['settlement'] = return_df
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets} ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets} ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets}
return ret return ret
...@@ -260,7 +251,7 @@ def fetch_train_phase(engine, ...@@ -260,7 +251,7 @@ def fetch_train_phase(engine,
dateRule=BizDayConventions.Following, dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward) dateGenerationRule=DateGeneration.Backward)
horizon = _map_horizon(frequency) horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
...@@ -339,10 +330,10 @@ def fetch_predict_phase(engine, ...@@ -339,10 +330,10 @@ def fetch_predict_phase(engine,
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
risk_exp = train_x[neutralized_risk].values.astype(float) risk_exp = train_x[neutralized_risk].values.astype(float)
x_values = train_x[names].values.astype(float)
else: else:
train_x = factor_df.copy() train_x = factor_df.copy()
risk_exp = None risk_exp = None
x_values = train_x[names].values.astype(float)
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime() date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
dates = np.unique(date_label) dates = np.unique(date_label)
......
...@@ -6,87 +6,75 @@ Created on 2017-5-10 ...@@ -6,87 +6,75 @@ Created on 2017-5-10
""" """
import numpy as np import numpy as np
import arrow
from distutils.version import LooseVersion from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from sklearn.linear_model import LinearRegression as LinearRegressionImpl from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import Lasso from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert from PyFin.api import pyFinAssert
from alphamind.model.modelbase import ModelBase from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class ConstLinearModelImpl(object):
def __init__(self, weights: np.ndarray = None):
self.weights = np.array(weights).flatten()
def fit(self, x: np.ndarray, y: np.ndarray):
pass
def predict(self, x: np.ndarray):
return x @ self.weights
class ConstLinearModel(ModelBase): class ConstLinearModel(ModelBase):
def __init__(self, def __init__(self,
features: list=None, features: list = None,
weights: np.ndarray=None): weights: np.ndarray = None):
super().__init__(features) super().__init__(features)
if features is not None and weights is not None: if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights), pyFinAssert(len(features) == len(weights),
ValueError, ValueError,
"length of features is not equal to length of weights") "length of features is not equal to length of weights")
self.weights = np.array(weights).flatten() self.impl = ConstLinearModelImpl(weights)
def fit(self, x: np.ndarray, y: np.ndarray):
pass
def predict(self, x):
return x @ self.weights
def save(self): def save(self):
model_desc = super().save() model_desc = super().save()
model_desc['weight'] = list(self.weights) model_desc['weight'] = list(self.impl.weights)
return model_desc return model_desc
@classmethod @classmethod
def load(cls, model_desc: dict): def load(cls, model_desc: dict):
obj_layout = cls() return super().load(model_desc)
obj_layout.features = model_desc['features']
obj_layout.weights = np.array(model_desc['weight']) @property
return obj_layout def weights(self):
return self.impl.weights.tolist()
class LinearRegression(ModelBase): class LinearRegression(ModelBase):
def __init__(self, features: list=None, fit_intercept: bool=False): def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features) super().__init__(features)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept) self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y)
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time
model_desc['weight'] = self.impl.coef_.tolist() model_desc['weight'] = self.impl.coef_.tolist()
return model_desc return model_desc
def score(self) -> float:
return self.impl.score()
@classmethod @classmethod
def load(cls, model_desc: dict): def load(cls, model_desc: dict):
obj_layout = cls() obj_layout = super().load(model_desc)
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']): if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. ' alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format( 'Loaded model may work incorrectly.'.format(sklearn_version,
sklearn_version, model_desc['sklearn_version'])) model_desc['sklearn_version']))
obj_layout.impl = decode(model_desc['desc'])
return obj_layout return obj_layout
@property @property
...@@ -96,42 +84,52 @@ class LinearRegression(ModelBase): ...@@ -96,42 +84,52 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase): class LassoRegression(ModelBase):
def __init__(self, alpha, features: list=None, fit_intercept: bool=False): def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features) super().__init__(features)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept) self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray): def save(self) -> dict:
self.impl.fit(x, y) model_desc = super().save()
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") model_desc['sklearn_version'] = sklearn_version
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
def predict(self, x: np.ndarray) -> np.ndarray: if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
return self.impl.predict(x) alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
def weights(self):
return self.impl.coef_.tolist()
class LogisticRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time
model_desc['weight'] = self.impl.coef_.tolist() model_desc['weight'] = self.impl.coef_.tolist()
return model_desc return model_desc
def score(self) -> float:
return self.impl.score()
@classmethod @classmethod
def load(cls, model_desc: dict): def load(cls, model_desc: dict):
obj_layout = cls(alpha=0.) obj_layout = super().load(model_desc)
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']): if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. ' alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format( 'Loaded model may work incorrectly.'.format(sklearn_version,
sklearn_version, model_desc['sklearn_version'])) model_desc['sklearn_version']))
obj_layout.impl = decode(model_desc['desc'])
return obj_layout return obj_layout
@property @property
...@@ -140,8 +138,8 @@ class LassoRegression(ModelBase): ...@@ -140,8 +138,8 @@ class LassoRegression(ModelBase):
if __name__ == '__main__': if __name__ == '__main__':
import pprint import pprint
ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5])) ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5]))
x = np.array([[0.2, 0.2], x = np.array([[0.2, 0.2],
......
...@@ -9,6 +9,11 @@ from alphamind.model.modelbase import ModelBase ...@@ -9,6 +9,11 @@ from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
def load_model(model_desc: dict) -> ModelBase: def load_model(model_desc: dict) -> ModelBase:
...@@ -22,5 +27,15 @@ def load_model(model_desc: dict) -> ModelBase: ...@@ -22,5 +27,15 @@ def load_model(model_desc: dict) -> ModelBase:
return LinearRegression.load(model_desc) return LinearRegression.load(model_desc)
elif 'LassoRegression' in model_name_parts: elif 'LassoRegression' in model_name_parts:
return LassoRegression.load(model_desc) return LassoRegression.load(model_desc)
elif 'LogisticRegression' in model_name_parts:
return LogisticRegression.load(model_desc)
elif 'RandomForestRegressor' in model_name_parts:
return RandomForestRegressor.load(model_desc)
elif 'RandomForestClassifier' in model_name_parts:
return RandomForestClassifier.load(model_desc)
elif 'XGBRegressor' in model_name_parts:
return XGBRegressor.load(model_desc)
elif 'XGBClassifier' in model_name_parts:
return XGBClassifier.load(model_desc)
else: else:
raise ValueError('{0} is not currently supported in model loader.'.format(model_name)) raise ValueError('{0} is not currently supported in model loader.'.format(model_name))
...@@ -9,6 +9,8 @@ import abc ...@@ -9,6 +9,8 @@ import abc
import arrow import arrow
import numpy as np import numpy as np
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class ModelBase(metaclass=abc.ABCMeta): class ModelBase(metaclass=abc.ABCMeta):
...@@ -16,14 +18,18 @@ class ModelBase(metaclass=abc.ABCMeta): ...@@ -16,14 +18,18 @@ class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None): def __init__(self, features: list=None):
if features is not None: if features is not None:
self.features = list(features) self.features = list(features)
self.impl = None
self.trained_time = None
@abc.abstractmethod
def fit(self, x, y): def fit(self, x, y):
pass self.impl.fit(x, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
@abc.abstractmethod def predict(self, x: np.ndarray) -> np.ndarray:
def predict(self, x) -> np.ndarray: return self.impl.predict(x)
pass
def score(self, x: np.ndarray, y: np.ndarray) -> float:
return self.impl.score(x, y)
@abc.abstractmethod @abc.abstractmethod
def save(self) -> dict: def save(self) -> dict:
...@@ -34,10 +40,17 @@ class ModelBase(metaclass=abc.ABCMeta): ...@@ -34,10 +40,17 @@ class ModelBase(metaclass=abc.ABCMeta):
model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__, model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
language='python', language='python',
saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"), saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"),
features=list(self.features)) features=list(self.features),
trained_time=self.trained_time,
desc=encode(self.impl),
internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
return model_desc return model_desc
@abc.abstractclassmethod @abc.abstractclassmethod
def load(cls, model_desc: dict): def load(cls, model_desc: dict):
pass obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
obj_layout.impl = decode(model_desc['desc'])
return obj_layout
...@@ -5,47 +5,154 @@ Created on 2017-12-4 ...@@ -5,47 +5,154 @@ Created on 2017-12-4
@author: cheng.li @author: cheng.li
""" """
import arrow from typing import List
import numpy as np
from distutils.version import LooseVersion from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
from xgboost import __version__ as xgbboot_version
from xgboost import XGBRegressor as XGBRegressorImpl
from xgboost import XGBClassifier as XGBClassifierImpl
from alphamind.model.modelbase import ModelBase from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class RandomForestRegressor(ModelBase): class RandomForestRegressor(ModelBase):
def __init__(self, n_estimators, features=None, *args, **kwargs): def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List=None,
**kwargs):
super().__init__(features) super().__init__(features)
self.impl = RandomForestRegressorImpl(n_estimators, *args, **kwargs) self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray): def save(self) -> dict:
self.impl.fit(x, y) model_desc = super().save()
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x) class RandomForestClassifier(ModelBase):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List = None,
**kwargs):
super().__init__(features)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time model_desc['importances'] = self.importances
return model_desc
@classmethod @classmethod
def load(cls, model_desc: dict): def load(cls, model_desc: dict):
obj_layout = cls() obj_layout = super().load(model_desc)
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']): if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. ' alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format( 'Loaded model may work incorrectly.'.format(sklearn_version,
sklearn_version, model_desc['sklearn_version'])) model_desc['sklearn_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
obj_layout.impl = decode(model_desc['desc'])
class XGBRegressor(ModelBase):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List=None,
**kwargs):
super().__init__(features)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['xgbboot_version']):
alpha_logger.warning('Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(xgbboot_version,
model_desc['xgbboot_version']))
return obj_layout return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
class XGBClassifier(ModelBase):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List = None,
**kwargs):
super().__init__(features)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['xgbboot_version']):
alpha_logger.warning('Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(xgbboot_version,
model_desc['xgbboot_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
...@@ -8,8 +8,11 @@ Created on 2017-9-4 ...@@ -8,8 +8,11 @@ Created on 2017-9-4
import unittest import unittest
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression as LinearRegression2 from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression
class TestLinearModel(unittest.TestCase): class TestLinearModel(unittest.TestCase):
...@@ -17,7 +20,8 @@ class TestLinearModel(unittest.TestCase): ...@@ -17,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.train_x = np.random.randn(1000, self.n) self.train_x = np.random.randn(1000, self.n)
self.train_y = np.random.randn(1000, 1) self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n) self.predict_x = np.random.randn(10, self.n)
def test_const_linear_model(self): def test_const_linear_model(self):
...@@ -36,7 +40,7 @@ class TestLinearModel(unittest.TestCase): ...@@ -36,7 +40,7 @@ class TestLinearModel(unittest.TestCase):
weights=weights) weights=weights)
desc = model.save() desc = model.save()
new_model = ConstLinearModel.load(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.weights, new_model.weights) np.testing.assert_array_almost_equal(model.weights, new_model.weights)
...@@ -52,15 +56,44 @@ class TestLinearModel(unittest.TestCase): ...@@ -52,15 +56,44 @@ class TestLinearModel(unittest.TestCase):
expected_y = expected_model.predict(self.predict_x) expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_linear_regression_persistence(self): def test_linear_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False) model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y) model.fit(self.train_x, self.train_y)
desc = model.save() desc = model.save()
new_model = LinearRegression.load(desc) new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x) calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x) expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
def test_logistic_regression(self):
model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
calculated_y = model.predict(self.predict_x)
expected_model = LogisticRegression2(fit_intercept=False)
expected_model.fit(self.train_x, self.train_y_label)
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_logistic_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
desc = model.save()
new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
# -*- coding: utf-8 -*-
"""
Created on 2018-1-5
@author: cheng.li
"""
import unittest
import numpy as np
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
class TestTreeModel(unittest.TestCase):
def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
y = np.where(y > 0, 1, 0)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
y = np.where(y > 0, 1, 0)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
...@@ -28,6 +28,7 @@ from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis ...@@ -28,6 +28,7 @@ from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
from alphamind.tests.model.test_linearmodel import TestLinearModel from alphamind.tests.model.test_linearmodel import TestLinearModel
from alphamind.tests.model.test_treemodel import TestTreeModel
from alphamind.tests.model.test_loader import TestLoader from alphamind.tests.model.test_loader import TestLoader
from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
...@@ -54,6 +55,7 @@ if __name__ == '__main__': ...@@ -54,6 +55,7 @@ if __name__ == '__main__':
TestFactorAnalysis, TestFactorAnalysis,
TestQuantileAnalysis, TestQuantileAnalysis,
TestLinearModel, TestLinearModel,
TestTreeModel,
TestLoader, TestLoader,
TestNaiveExecutor, TestNaiveExecutor,
TestThresholdExecutor, TestThresholdExecutor,
......
...@@ -16,6 +16,27 @@ import numba as nb ...@@ -16,6 +16,27 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info') alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
def groupby(groups): def groupby(groups):
order = groups.argsort() order = groups.argsort()
t = groups[order] t = groups[order]
......
#!/bin/sh #!/bin/sh
cd alphamind/pfopt cd xgboost
git submodule init
git submodule update
./build_linux.sh make -j4
cd python-package
python setup.py install
if [ $? -ne 0 ] ; then
cd ../..
exit 1
fi
cd ../..
cd alphamind/pfopt
./build_linux.sh
if [ $? -ne 0 ] ; then if [ $? -ne 0 ] ; then
cd ../.. cd ../..
exit 1 exit 1
......
@echo off @echo off
cd xgboost
git submodule init
git submodule update
mkdir build
cd build
cmake .. -G "Visual Studio 14 2015 Win64"
msbuild xgboost.sln /m /p:Configuration=Release /p:Platform=x64
if %errorlevel% neq 0 exit /b 1
cd ../python-package
python setup.py install
if %errorlevel% neq 0 exit /b 1
cd ../..
cd alphamind\pfopt cd alphamind\pfopt
call build_windows.bat call build_windows.bat
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment