Unverified Commit 4622cdf5 authored by lion-sing's avatar lion-sing Committed by GitHub

Merge pull request #1 from alpha-miner/master

update repo
parents 575c79ca b899c787
[submodule "alphamind/pfopt"]
path = alphamind/pfopt
url = https://github.com/alpha-miner/portfolio-optimizer.git
[submodule "xgboost"]
path = xgboost
url = https://github.com/dmlc/xgboost.git
......@@ -42,16 +42,16 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
在Windows上完整安装,需要有C++编译器(例如msvc):
```bash
build_windows_dependencies.bat
```
```bash
build_windows_dependencies.bat
```
* Linux
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran)
```bash
build_linux_dependencies.sh
```bash
build_linux_dependencies.sh
```
## 安装
......
......@@ -4,3 +4,6 @@ Created on 2017-4-25
@author: cheng.li
"""
__version__ = "0.1.0"
......@@ -25,11 +25,15 @@ from alphamind.data.standardize import projection
from alphamind.data.neutralize import neutralize
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.loader import load_model
from alphamind.model import LinearRegression
from alphamind.model import LassoRegression
from alphamind.model import ConstLinearModel
from alphamind.model import LogisticRegression
from alphamind.model import RandomForestRegressor
from alphamind.model import RandomForestClassifier
from alphamind.model import XGBRegressor
from alphamind.model import XGBClassifier
from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
......@@ -39,27 +43,7 @@ from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.utilities import alpha_logger
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
from alphamind.utilities import map_freq
__all__ = [
......@@ -85,7 +69,11 @@ __all__ = [
'LinearRegression',
'LassoRegression',
'ConstLinearModel',
'LogisticRegression',
'RandomForestRegressor',
'RandomForestClassifier',
'XGBRegressor',
'XGBClassifier',
'load_model',
'NaiveExecutor',
'ThresholdExecutor',
......
......@@ -664,63 +664,6 @@ class Experimental(Base):
DROEAfterNonRecurring = Column(Float(53))
CFinc1 = Column(Float(53))
xueqiu_hotness = Column(Float(53))
con_eps = Column(Float(53))
con_pb = Column(Float(53))
con_pb_order = Column(Float(53))
con_pb_rolling = Column(Float(53))
con_pb_rolling_order = Column(Float(53))
con_pe = Column(Float(53))
con_pe_order = Column(Float(53))
con_pe_rolling = Column(Float(53))
con_pe_rolling_order = Column(Float(53))
con_peg = Column(Float(53))
con_peg_order = Column(Float(53))
con_peg_rolling = Column(Float(53))
con_peg_rolling_order = Column(Float(53))
con_ps = Column(Float(53))
con_ps_order = Column(Float(53))
con_ps_rolling = Column(Float(53))
con_ps_rolling_order = Column(Float(53))
con_target_price = Column(Float(53))
market_confidence_10d = Column(Float(53))
market_confidence_15d = Column(Float(53))
market_confidence_25d = Column(Float(53))
market_confidence_5d = Column(Float(53))
market_confidence_75d = Column(Float(53))
optimism_confidence_10d = Column(Float(53))
optimism_confidence_15d = Column(Float(53))
optimism_confidence_25d = Column(Float(53))
optimism_confidence_5d = Column(Float(53))
optimism_confidence_75d = Column(Float(53))
pessimism_confidence_10d = Column(Float(53))
pessimism_confidence_15d = Column(Float(53))
pessimism_confidence_25d = Column(Float(53))
pessimism_confidence_5d = Column(Float(53))
pessimism_confidence_75d = Column(Float(53))
con_na_yoy = Column(Float(53))
con_np_yoy = Column(Float(53))
con_npcgrate_13w = Column(Float(53))
con_npcgrate_1w = Column(Float(53))
con_npcgrate_26w = Column(Float(53))
con_npcgrate_2y = Column(Float(53))
con_npcgrate_4w = Column(Float(53))
con_npcgrate_52w = Column(Float(53))
con_or_yoy = Column(Float(53))
con_roe_yoy1 = Column(Float(53))
con_roe_yoy2 = Column(Float(53))
con_roe_yoy3 = Column(Float(53))
con_eps_rolling = Column(Float(53))
con_np = Column(Float(53))
con_np_rolling = Column(Float(53))
con_or = Column(Float(53))
con_or_rolling = Column(Float(53))
con_roe = Column(Float(53))
con_na = Column(Float(53))
con_na_rolling = Column(Float(53))
mcap = Column(Float(53))
tcap = Column(Float(53))
ta = Column(Float(53))
na = Column(Float(53))
eps_q = Column(Float(53))
roe_q = Column(Float(53))
cfinc1_q = Column(Float(53))
......
......@@ -8,11 +8,22 @@ Created on 2017-5-2
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
from alphamind.model.loader import load_model
__all__ = ['LinearRegression',
'LassoRegression',
'ConstLinearModel',
'RandomForestRegressor']
\ No newline at end of file
'LogisticRegression',
'RandomForestRegressor',
'RandomForestClassifier',
'XGBRegressor',
'XGBClassifier',
'load_model']
\ No newline at end of file
......@@ -16,27 +16,13 @@ from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar
from PyFin.DateUtilities import Period
from PyFin.Enums import TimeUnits
from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.utilities import alpha_logger
def _map_horizon(frequency: str) -> int:
parsed_period = Period(frequency)
unit = parsed_period.units()
length = parsed_period.length()
if unit == TimeUnits.BDays or unit == TimeUnits.Days:
return length - 1
elif unit == TimeUnits.Weeks:
return 5 * length - 1
elif unit == TimeUnits.Months:
return 22 * length - 1
else:
raise ValueError('{0} is an unrecognized frequency rule'.format(frequency))
from alphamind.utilities import map_freq
def _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk):
......@@ -86,7 +72,7 @@ def prepare_data(engine: SqlEngine,
dates = [d.strftime('%Y-%m-%d') for d in dates]
horizon = _map_horizon(frequency)
horizon = map_freq(frequency)
if isinstance(factors, Transformer):
transformer = factors
......@@ -119,8 +105,10 @@ def batch_processing(x_values,
post_process):
train_x_buckets = {}
train_y_buckets = {}
train_risk_buckets = {}
predict_x_buckets = {}
predict_y_buckets = {}
predict_risk_buckets = {}
for i, start in enumerate(groups[:-batch]):
end = groups[i + batch]
......@@ -146,6 +134,8 @@ def batch_processing(x_values,
risk_factors=this_risk_exp,
post_process=post_process)
train_risk_buckets[end] = this_risk_exp
left_index = bisect.bisect_right(group_label, start)
right_index = bisect.bisect_right(group_label, end)
......@@ -165,6 +155,7 @@ def batch_processing(x_values,
inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = ne_x[inner_left_index:inner_right_index]
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
this_raw_y = y_values[left_index:right_index]
if len(this_raw_y) > 0:
......@@ -174,7 +165,7 @@ def batch_processing(x_values,
post_process=post_process)
predict_y_buckets[end] = ne_y[inner_left_index:inner_right_index]
return train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets
return train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets
def fetch_data_package(engine: SqlEngine,
......@@ -216,7 +207,7 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Loading data is finished")
train_x_buckets, train_y_buckets, predict_x_buckets, predict_y_buckets = batch_processing(
train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets = batch_processing(
x_values,
y_values,
dates,
......@@ -231,8 +222,8 @@ def fetch_data_package(engine: SqlEngine,
ret = dict()
ret['x_names'] = transformer.names
ret['settlement'] = return_df
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets}
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets}
return ret
......@@ -260,7 +251,7 @@ def fetch_train_phase(engine,
dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward)
horizon = _map_horizon(frequency)
horizon = map_freq(frequency)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
......@@ -339,10 +330,10 @@ def fetch_predict_phase(engine,
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
risk_exp = train_x[neutralized_risk].values.astype(float)
x_values = train_x[names].values.astype(float)
else:
train_x = factor_df.copy()
risk_exp = None
x_values = train_x[names].values.astype(float)
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
dates = np.unique(date_label)
......
......@@ -6,87 +6,75 @@ Created on 2017-5-10
"""
import numpy as np
import arrow
from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version
from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import Lasso
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert
from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class ConstLinearModelImpl(object):
def __init__(self, weights: np.ndarray = None):
self.weights = np.array(weights).flatten()
def fit(self, x: np.ndarray, y: np.ndarray):
pass
def predict(self, x: np.ndarray):
return x @ self.weights
class ConstLinearModel(ModelBase):
def __init__(self,
features: list=None,
weights: np.ndarray=None):
features: list = None,
weights: np.ndarray = None):
super().__init__(features)
if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights),
ValueError,
"length of features is not equal to length of weights")
self.weights = np.array(weights).flatten()
def fit(self, x: np.ndarray, y: np.ndarray):
pass
def predict(self, x):
return x @ self.weights
self.impl = ConstLinearModelImpl(weights)
def save(self):
model_desc = super().save()
model_desc['weight'] = list(self.weights)
model_desc['weight'] = list(self.impl.weights)
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.weights = np.array(model_desc['weight'])
return obj_layout
return super().load(model_desc)
@property
def weights(self):
return self.impl.weights.tolist()
class LinearRegression(ModelBase):
def __init__(self, features: list=None, fit_intercept: bool=False):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y)
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
def save(self) -> dict:
model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
def score(self) -> float:
return self.impl.score()
@classmethod
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(
sklearn_version, model_desc['sklearn_version']))
obj_layout.impl = decode(model_desc['desc'])
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
......@@ -96,42 +84,52 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase):
def __init__(self, alpha, features: list=None, fit_intercept: bool=False):
def __init__(self, alpha=0.01, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y)
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
def weights(self):
return self.impl.coef_.tolist()
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
class LogisticRegression(ModelBase):
def __init__(self, features: list = None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time
model_desc['weight'] = self.impl.coef_.tolist()
return model_desc
def score(self) -> float:
return self.impl.score()
@classmethod
def load(cls, model_desc: dict):
obj_layout = cls(alpha=0.)
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(
sklearn_version, model_desc['sklearn_version']))
obj_layout.impl = decode(model_desc['desc'])
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
......@@ -140,8 +138,8 @@ class LassoRegression(ModelBase):
if __name__ == '__main__':
import pprint
ls = ConstLinearModel(['a', 'b'], np.array([0.5, 0.5]))
x = np.array([[0.2, 0.2],
......
......@@ -9,6 +9,11 @@ from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
def load_model(model_desc: dict) -> ModelBase:
......@@ -22,5 +27,15 @@ def load_model(model_desc: dict) -> ModelBase:
return LinearRegression.load(model_desc)
elif 'LassoRegression' in model_name_parts:
return LassoRegression.load(model_desc)
elif 'LogisticRegression' in model_name_parts:
return LogisticRegression.load(model_desc)
elif 'RandomForestRegressor' in model_name_parts:
return RandomForestRegressor.load(model_desc)
elif 'RandomForestClassifier' in model_name_parts:
return RandomForestClassifier.load(model_desc)
elif 'XGBRegressor' in model_name_parts:
return XGBRegressor.load(model_desc)
elif 'XGBClassifier' in model_name_parts:
return XGBClassifier.load(model_desc)
else:
raise ValueError('{0} is not currently supported in model loader.'.format(model_name))
......@@ -9,6 +9,8 @@ import abc
import arrow
import numpy as np
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class ModelBase(metaclass=abc.ABCMeta):
......@@ -16,14 +18,18 @@ class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None):
if features is not None:
self.features = list(features)
self.impl = None
self.trained_time = None
@abc.abstractmethod
def fit(self, x, y):
pass
self.impl.fit(x, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
@abc.abstractmethod
def predict(self, x) -> np.ndarray:
pass
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
def score(self, x: np.ndarray, y: np.ndarray) -> float:
return self.impl.score(x, y)
@abc.abstractmethod
def save(self) -> dict:
......@@ -34,10 +40,17 @@ class ModelBase(metaclass=abc.ABCMeta):
model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
language='python',
saved_time=arrow.now().format("YYYY-MM-DD HH:mm:ss"),
features=list(self.features))
features=list(self.features),
trained_time=self.trained_time,
desc=encode(self.impl),
internal_model=self.impl.__class__.__module__ + "." + self.impl.__class__.__name__)
return model_desc
@abc.abstractclassmethod
def load(cls, model_desc: dict):
pass
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
obj_layout.impl = decode(model_desc['desc'])
return obj_layout
......@@ -5,47 +5,154 @@ Created on 2017-12-4
@author: cheng.li
"""
import arrow
import numpy as np
from typing import List
from distutils.version import LooseVersion
from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
from xgboost import __version__ as xgbboot_version
from xgboost import XGBRegressor as XGBRegressorImpl
from xgboost import XGBClassifier as XGBClassifierImpl
from alphamind.model.modelbase import ModelBase
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
class RandomForestRegressor(ModelBase):
def __init__(self, n_estimators, features=None, *args, **kwargs):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List=None,
**kwargs):
super().__init__(features)
self.impl = RandomForestRegressorImpl(n_estimators, *args, **kwargs)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y)
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
class RandomForestClassifier(ModelBase):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List = None,
**kwargs):
super().__init__(features)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def save(self) -> dict:
model_desc = super().save()
model_desc['internal_model'] = self.impl.__class__.__module__ + "." + self.impl.__class__.__name__
model_desc['desc'] = encode(self.impl)
model_desc['sklearn_version'] = sklearn_version
model_desc['trained_time'] = self.trained_time
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = cls()
obj_layout.features = model_desc['features']
obj_layout.trained_time = model_desc['trained_time']
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['sklearn_version']):
alpha_logger.warning('Current sklearn version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(
sklearn_version, model_desc['sklearn_version']))
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc['sklearn_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
obj_layout.impl = decode(model_desc['desc'])
class XGBRegressor(ModelBase):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List=None,
**kwargs):
super().__init__(features)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['xgbboot_version']):
alpha_logger.warning('Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(xgbboot_version,
model_desc['xgbboot_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
class XGBClassifier(ModelBase):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List = None,
**kwargs):
super().__init__(features)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
**kwargs)
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
def load(cls, model_desc: dict):
obj_layout = super().load(model_desc)
if LooseVersion(sklearn_version) < LooseVersion(model_desc['xgbboot_version']):
alpha_logger.warning('Current xgboost version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(xgbboot_version,
model_desc['xgbboot_version']))
return obj_layout
@property
def importances(self):
return self.impl.feature_importances_.tolist()
......@@ -8,8 +8,11 @@ Created on 2017-9-4
import unittest
import numpy as np
from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression
class TestLinearModel(unittest.TestCase):
......@@ -17,7 +20,8 @@ class TestLinearModel(unittest.TestCase):
def setUp(self):
self.n = 3
self.train_x = np.random.randn(1000, self.n)
self.train_y = np.random.randn(1000, 1)
self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n)
def test_const_linear_model(self):
......@@ -36,7 +40,7 @@ class TestLinearModel(unittest.TestCase):
weights=weights)
desc = model.save()
new_model = ConstLinearModel.load(desc)
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.weights, new_model.weights)
......@@ -52,15 +56,44 @@ class TestLinearModel(unittest.TestCase):
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_linear_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y)
desc = model.save()
new_model = LinearRegression.load(desc)
new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
def test_logistic_regression(self):
model = LogisticRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
calculated_y = model.predict(self.predict_x)
expected_model = LogisticRegression2(fit_intercept=False)
expected_model.fit(self.train_x, self.train_y_label)
expected_y = expected_model.predict(self.predict_x)
np.testing.assert_array_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(expected_model.coef_, model.weights)
def test_logistic_regression_persistence(self):
model = LinearRegression(['a', 'b', 'c'], fit_intercept=False)
model.fit(self.train_x, self.train_y_label)
desc = model.save()
new_model = load_model(desc)
calculated_y = new_model.predict(self.predict_x)
expected_y = model.predict(self.predict_x)
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
# -*- coding: utf-8 -*-
"""
Created on 2018-1-5
@author: cheng.li
"""
import unittest
import numpy as np
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBClassifier
class TestTreeModel(unittest.TestCase):
def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
y = np.where(y > 0, 1, 0)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10)))
x = np.random.randn(1000, 10)
y = np.random.randn(1000)
y = np.where(y > 0, 1, 0)
model.fit(x, y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
......@@ -28,6 +28,7 @@ from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
from alphamind.tests.model.test_linearmodel import TestLinearModel
from alphamind.tests.model.test_treemodel import TestTreeModel
from alphamind.tests.model.test_loader import TestLoader
from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
......@@ -54,6 +55,7 @@ if __name__ == '__main__':
TestFactorAnalysis,
TestQuantileAnalysis,
TestLinearModel,
TestTreeModel,
TestLoader,
TestNaiveExecutor,
TestThresholdExecutor,
......
......@@ -16,6 +16,27 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '2w':
horizon = 9
elif freq == '3w':
horizon = 14
elif freq == '4w':
horizon = 19
elif freq == '1d':
horizon = 0
elif freq[-1] == "b":
horizon = int(freq[:-1]) - 1
else:
raise ValueError("Unrecognized freq: {0}".format(freq))
return horizon
def groupby(groups):
order = groups.argsort()
t = groups[order]
......
#!/bin/sh
cd alphamind/pfopt
cd xgboost
git submodule init
git submodule update
./build_linux.sh
make -j4
cd python-package
python setup.py install
if [ $? -ne 0 ] ; then
cd ../..
......@@ -10,3 +14,12 @@ if [ $? -ne 0 ] ; then
fi
cd ../..
cd alphamind/pfopt
./build_linux.sh
if [ $? -ne 0 ] ; then
cd ../..
exit 1
fi
cd ../..
\ No newline at end of file
@echo off
cd xgboost
git submodule init
git submodule update
mkdir build
cd build
cmake .. -G "Visual Studio 14 2015 Win64"
msbuild xgboost.sln /m /p:Configuration=Release /p:Platform=x64
if %errorlevel% neq 0 exit /b 1
cd ../python-package
python setup.py install
if %errorlevel% neq 0 exit /b 1
cd ../..
cd alphamind\pfopt
call build_windows.bat
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment