Commit 4b49b7bb authored by Yucheng's avatar Yucheng

Merge branch 'master' of https://github.com/lion-sing/alpha-mind

parents 6c4fb657 2c6b4668
......@@ -33,6 +33,8 @@ install:
- conda install pandas
- conda install scikit-learn
- conda install cython
- conda install sqlalchemy
- conda install psycopg2
- conda install -c conda-forge arrow
- pip install simpleutils
- pip install coveralls
......
......@@ -48,8 +48,8 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
* Linux
   在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran):
在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran):
```bash
build_linux_dependencies.sh
```
......
......@@ -43,6 +43,10 @@ from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta
from alphamind.model.composer import train_model
from alphamind.model.composer import predict_by_model
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
......@@ -79,6 +83,10 @@ __all__ = [
'fetch_data_package',
'fetch_train_phase',
'fetch_predict_phase',
'Composer',
'DataMeta',
'train_model',
'predict_by_model',
'LinearRegression',
'LassoRegression',
'ConstLinearModel',
......
......@@ -5,8 +5,7 @@ Created on 2017-6-29
@author: cheng.li
"""
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text, JSON
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
......@@ -635,7 +634,7 @@ class DailyPortfolios(Base):
industry = Column(String(50), nullable=False)
benchmark_weight = Column(Float(53), nullable=False)
is_tradable = Column(Boolean, nullable=False)
factor = Column(JSONB)
factor = Column(JSON)
class DailyPortfoliosSchedule(Base):
......@@ -857,7 +856,8 @@ class Models(Base):
model_type = Column(String(30), nullable=False)
model_version = Column(BigInteger, nullable=False)
update_time = Column(DateTime, nullable=False)
model_desc = Column(JSONB, nullable=False)
model_desc = Column(JSON, nullable=False)
data_meta = Column(JSON, nullable=True)
is_primary = Column(Boolean)
model_id = Column(Integer, primary_key=True, autoincrement=True)
......@@ -915,7 +915,7 @@ class Positions(Base):
trade_date = Column(DateTime, primary_key=True, nullable=False)
portfolio = Column(String(50), primary_key=True, nullable=False)
type = Column(String(50), primary_key=True, nullable=False)
weight = Column(JSONB)
weight = Column(JSON)
class QuantileAnalysis(Base):
......@@ -1865,7 +1865,7 @@ class Formulas(Base):
__tablename__ = 'formulas'
formula = Column(String(50), primary_key=True)
formula_desc = Column(JSONB, nullable=False)
formula_desc = Column(JSON, nullable=False)
comment = Column(Text)
......
......@@ -155,6 +155,13 @@ class SqlEngine(object):
dates: Iterable[str] = None) -> pd.DataFrame:
return universe.query(self, start_date, end_date, dates)
def _create_stats(self, table, horizon, offset, code_attr='code'):
stats = func.sum(self.ln_func(1. + table.chgPct)).over(
partition_by=getattr(table, code_attr),
order_by=table.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
return stats
def fetch_dx_return(self,
ref_date: str,
codes: Iterable[int],
......@@ -169,10 +176,7 @@ class SqlEngine(object):
else:
end_date = expiry_date
stats = func.sum(self.ln_func(1. + Market.chgPct)).over(
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
stats = self._create_stats(Market, horizon, offset)
query = select([Market.trade_date, Market.code, stats]).where(
and_(
......@@ -200,24 +204,22 @@ class SqlEngine(object):
end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
stats = func.sum(self.ln_func(1. + Market.chgPct)).over(
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + offset + DAILY_RETURN_OFFSET, 1 + horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
stats = self._create_stats(Market, horizon, offset)
cond = universe._query_statements(start_date, end_date, None)
big_table = join(Market, UniverseTable,
t = select([Market.trade_date, Market.code, stats]).where(
Market.trade_date.between(start_date, end_date)
).alias('t')
big_table = join(t, UniverseTable,
and_(
Market.trade_date == UniverseTable.trade_date,
Market.code == UniverseTable.code,
t.columns['trade_date'] == UniverseTable.trade_date,
t.columns['code'] == UniverseTable.code,
cond
)
)
query = select([Market.trade_date, Market.code, stats]) \
.select_from(big_table)
query = select([t]).select_from(big_table)
df = pd.read_sql(query, self.session.bind).dropna()
if universe.is_filtered:
......@@ -242,10 +244,7 @@ class SqlEngine(object):
else:
end_date = expiry_date
stats = func.sum(self.ln_func(1. + IndexMarket.chgPct)).over(
partition_by=IndexMarket.indexCode,
order_by=IndexMarket.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode')
query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]).where(
and_(
......@@ -273,10 +272,7 @@ class SqlEngine(object):
end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
stats = func.sum(self.ln_func(1. + IndexMarket.chgPct)).over(
partition_by=IndexMarket.indexCode,
order_by=IndexMarket.trade_date,
rows=(1 + offset + DAILY_RETURN_OFFSET, 1 + horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode')
query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]) \
.where(
......@@ -360,9 +356,11 @@ class SqlEngine(object):
factor_cols = _map_factors(dependency, factor_tables)
big_table = FullFactor
joined_tables = set()
joined_tables.add(FullFactor.__table__.name)
for t in set(factor_cols.values()):
if t.__table__.name != FullFactor.__table__.name:
if t.__table__.name not in joined_tables:
if dates is not None:
big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date,
FullFactor.code == t.code,
......@@ -371,20 +369,18 @@ class SqlEngine(object):
big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date,
FullFactor.code == t.code,
FullFactor.trade_date.between(start_date, end_date)))
joined_tables.add(t.__table__.name)
cond = universe._query_statements(start_date, end_date, dates)
big_table = join(big_table, UniverseTable,
and_(
FullFactor.trade_date == UniverseTable.trade_date,
FullFactor.code == UniverseTable.code,
cond
)
)
universe_df = universe.query(self, start_date, end_date, dates)
query = select(
[FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
.select_from(big_table).distinct()
.select_from(big_table).where(
and_(
FullFactor.code.in_(universe_df.code.unique().tolist()),
FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date)
)
).distinct()
df = pd.read_sql(query, self.engine)
if universe.is_filtered:
......@@ -395,7 +391,6 @@ class SqlEngine(object):
df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()
df.sort_values(['trade_date', 'code'], inplace=True)
df.set_index('trade_date', inplace=True)
res = transformer.transform('code', df)
......@@ -404,11 +399,13 @@ class SqlEngine(object):
df[col] = res[col].values
df['isOpen'] = df.isOpen.astype(bool)
return df.reset_index()
df = df.reset_index()
return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')
def fetch_benchmark(self,
ref_date: str,
benchmark: int) -> pd.DataFrame:
benchmark: int,
codes: Iterable[int]=None) -> pd.DataFrame:
query = select([IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.trade_date == ref_date,
......@@ -416,7 +413,13 @@ class SqlEngine(object):
)
)
return pd.read_sql(query, self.engine)
df = pd.read_sql(query, self.engine)
if codes:
df.set_index(['code'], inplace=True)
df = df.reindex(codes).fillna(0.)
df.reset_index(inplace=True)
return df
def fetch_benchmark_range(self,
benchmark: int,
......@@ -613,7 +616,7 @@ class SqlEngine(object):
res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0]*len(out_s))))
res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
return res
def fetch_trade_status(self,
......@@ -747,6 +750,7 @@ class SqlEngine(object):
model_version=None,
is_primary=True,
model_id=None) -> pd.DataFrame:
from alphamind.model.composer import DataMeta
conditions = []
......@@ -768,8 +772,10 @@ class SqlEngine(object):
model_df = pd.read_sql(query, self.engine)
for i, model_desc in enumerate(model_df.model_desc):
for i, data in enumerate(zip(model_df.model_desc, model_df.data_meta)):
model_desc, data_desc = data
model_df.loc[i, 'model'] = load_model(model_desc)
model_df.loc[i, 'data_meta'] = DataMeta.load(data_desc)
del model_df['model_desc']
return model_df
......@@ -923,10 +929,11 @@ class SqlEngine(object):
if __name__ == '__main__':
universe = Universe('ss', ['hs300'])
from PyFin.api import *
engine = SqlEngine()
ref_date = '2017-12-28'
codes = universe.query(engine, dates=[ref_date])
df = engine.fetch_trade_status(ref_date, codes.code.tolist())
print(df)
\ No newline at end of file
ref_date = '2017-06-29'
universe = Universe('', ['zz800'])
dates = makeSchedule('2010-01-01', '2018-02-01', '10b', 'china.sse')
df = engine.fetch_factor_range(universe, DIFF('roe_q'), dates=dates)
......@@ -7,17 +7,19 @@ Created on 2017-7-7
from typing import Iterable
import pandas as pd
from simpleutils.miscellaneous import list_eq
from sqlalchemy import and_
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import join
from sqlalchemy import outerjoin
from PyFin.api import pyFinAssert
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import FullFactor
from alphamind.data.engines.utilities import _map_factors
from alphamind.data.engines.utilities import factor_tables
from alphamind.data.transformer import Transformer
from alphamind.utilities import encode
from alphamind.utilities import decode
class Universe(object):
......@@ -25,15 +27,22 @@ class Universe(object):
def __init__(self,
name: str,
base_universe: Iterable,
exclude_universe: Iterable=None,
special_codes: Iterable=None,
exclude_universe: Iterable = None,
special_codes: Iterable = None,
filter_cond=None):
self.name = name
self.base_universe = base_universe
self.exclude_universe = exclude_universe
self.special_codes = special_codes
self.base_universe = sorted(base_universe) if base_universe else None
self.exclude_universe = sorted(exclude_universe) if exclude_universe else None
self.special_codes = sorted(special_codes) if special_codes else None
self.filter_cond = filter_cond
def __eq__(self, rhs):
return self.name == rhs.name \
and list_eq(self.base_universe, rhs.base_universe) \
and list_eq(self.exclude_universe, rhs.exclude_universe) \
and list_eq(self.special_codes, rhs.special_codes) \
and str(self.filter_cond) == str(rhs.filter_cond)
@property
def is_filtered(self):
return True if self.filter_cond is not None else False
......@@ -59,7 +68,7 @@ class Universe(object):
*and_conditions
)
def query(self, engine, start_date: str=None, end_date: str=None, dates=None) -> pd.DataFrame:
def query(self, engine, start_date: str = None, end_date: str = None, dates=None) -> pd.DataFrame:
universe_cond = self._query_statements(start_date, end_date, dates)
......@@ -103,6 +112,29 @@ class Universe(object):
df = df[df[filter_fields[0]] == 1].reset_index()[['trade_date', 'code']]
return df
def save(self):
return dict(
name=self.name,
base_universe=self.base_universe,
exclude_universe=self.exclude_universe,
special_codes=self.special_codes,
filter_cond=encode(self.filter_cond)
)
@classmethod
def load(cls, universe_desc: dict):
name = universe_desc['name']
base_universe = universe_desc['base_universe']
exclude_universe = universe_desc['exclude_universe']
special_codes = universe_desc['special_codes']
filter_cond = decode(universe_desc['filter_cond'])
return cls(name=name,
base_universe=base_universe,
exclude_universe=exclude_universe,
special_codes=special_codes,
filter_cond=filter_cond)
if __name__ == '__main__':
from PyFin.api import *
......
......@@ -12,9 +12,6 @@ from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import transform as transform_impl
DEFAULT_FACTOR_NAME = 'user_factor'
def factor_translator(factor_pool):
if not factor_pool:
......@@ -23,7 +20,7 @@ def factor_translator(factor_pool):
if isinstance(factor_pool, str):
return {factor_pool: factor_pool}, [factor_pool]
elif isinstance(factor_pool, SecurityValueHolder):
return {DEFAULT_FACTOR_NAME: factor_pool}, sorted(factor_pool.fields)
return {str(factor_pool): factor_pool}, sorted(factor_pool.fields)
elif isinstance(factor_pool, dict):
dependency = set()
for k, v in factor_pool.items():
......@@ -46,7 +43,7 @@ def factor_translator(factor_pool):
factor_dict[f] = f
dependency = dependency.union([f])
elif isinstance(f, SecurityValueHolder):
factor_dict[DEFAULT_FACTOR_NAME + '_' + str(k).zfill(3)] = f
factor_dict[str(f)] = f
dependency = dependency.union(f.fields)
k += 1
return factor_dict, sorted(dependency)
......@@ -80,3 +77,8 @@ class Transformer(object):
return transformed_data
else:
return pd.DataFrame()
if __name__ == '__main__':
transformer = Transformer(['c', 'a'])
......@@ -21,7 +21,7 @@ Back test parameter settings
"""
start_date = '2010-01-01'
end_date = '2018-01-26'
end_date = '2018-01-29'
frequency = '10b'
method = 'risk_neutral'
......@@ -216,7 +216,7 @@ def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True
def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q']
neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine()
benchmark_code = 905
universe_name = ['zz500']
......@@ -226,7 +226,7 @@ def worker_func_positive(factor_name):
def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q']
neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine()
benchmark_code = 905
universe_name = ['zz500']
......@@ -235,34 +235,34 @@ def worker_func_negative(factor_name):
if __name__ == '__main__':
# from dask.distributed import Client
#
# client = Client('10.63.6.176:8786')
#
# engine = SqlEngine()
# df = engine.fetch_factor_coverage()
# df = df[df.universe == 'zz800'].groupby('factor').mean()
# df = df[df.coverage >= 0.98]
#
# tasks = client.map(worker_func_positive, df.index.tolist())
# res1 = client.gather(tasks)
#
# tasks = client.map(worker_func_negative, df.index.tolist())
# res2 = client.gather(tasks)
#
# factor_df = pd.DataFrame()
#
# for f_name, df in res1:
# factor_df[f_name] = df['returns']
#
# for f_name, df in res2:
# factor_df[f_name] = df['returns']
from dask.distributed import Client
factor_name = LAST('ep_q') # LAST('EBITDA') / LAST('ev')
f_name, ret_df = worker_func_positive(factor_name)
client = Client('192.168.0.102:8786')
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
frequency, factor_name, 905),
secondary_y='tc_cost')
plt.show()
engine = SqlEngine()
df = engine.fetch_factor_coverage()
df = df[df.universe == 'zz800'].groupby('factor').mean()
df = df[df.coverage >= 0.98]
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
for f_name, df in res1:
factor_df[f_name] = df['returns']
for f_name, df in res2:
factor_df[f_name] = df['returns']
# factor_name = LAST('EBITDA') / LAST('ev')
# f_name, ret_df = worker_func_positive(factor_name)
#
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, 905),
# secondary_y='tc_cost')
# plt.show()
This diff is collapsed.
......@@ -15,6 +15,7 @@ from PyFin.api import makeSchedule
from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar
from PyFin.api import pyFinAssert
from PyFin.DateUtilities import Period
from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine
......@@ -101,7 +102,8 @@ def prepare_data(engine: SqlEngine,
['trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'] + transformer.names]
def batch_processing(x_values,
def batch_processing(names,
x_values,
y_values,
groups,
group_label,
......@@ -132,10 +134,11 @@ def batch_processing(x_values,
else:
this_risk_exp = None
train_x_buckets[end] = factor_processing(this_raw_x,
pre_process=pre_process,
risk_factors=this_risk_exp,
post_process=post_process)
train_x_buckets[end] = pd.DataFrame(factor_processing(this_raw_x,
pre_process=pre_process,
risk_factors=this_risk_exp,
post_process=post_process),
columns=names)
train_y_buckets[end] = factor_processing(this_raw_y,
pre_process=pre_process,
......@@ -163,7 +166,7 @@ def batch_processing(x_values,
inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = ne_x[inner_left_index:inner_right_index]
predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index], columns=names)
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
predict_codes_bucket[end] = this_codes[inner_left_index:inner_right_index]
......@@ -198,8 +201,8 @@ def fetch_data_package(engine: SqlEngine,
pre_process: Iterable[object] = None,
post_process: Iterable[object] = None) -> dict:
alpha_logger.info("Starting data package fetching ...")
transformer = Transformer(alpha_factors)
names = transformer.names
dates, return_df, factor_df = prepare_data(engine,
transformer,
start_date,
......@@ -210,7 +213,7 @@ def fetch_data_package(engine: SqlEngine,
warm_start)
return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
_merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)
_merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)
alpha_logger.info("data merging finished")
......@@ -226,7 +229,8 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Loading data is finished")
train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets, predict_codes_bucket \
= batch_processing(x_values,
= batch_processing(names,
x_values,
y_values,
dates,
date_label,
......@@ -239,15 +243,16 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Data processing is finished")
ret = dict()
ret['x_names'] = transformer.names
ret['x_names'] = names
ret['settlement'] = return_df
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, 'code': predict_codes_bucket}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
'code': predict_codes_bucket}
return ret
def fetch_train_phase(engine,
alpha_factors: Iterable[object],
alpha_factors: Union[Transformer, Iterable[object]],
ref_date,
frequency,
universe,
......@@ -257,7 +262,10 @@ def fetch_train_phase(engine,
pre_process: Iterable[object] = None,
post_process: Iterable[object] = None,
warm_start: int = 0) -> dict:
transformer = Transformer(alpha_factors)
if isinstance(alpha_factors, Transformer):
transformer = alpha_factors
else:
transformer = Transformer(alpha_factors)
p = Period(frequency)
p = Period(length=-(warm_start + batch + 1) * p.length(), units=p.units())
......@@ -284,11 +292,12 @@ def fetch_train_phase(engine,
_merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date))
end = dates[-2]
start = dates[-batch - 1]
start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
else:
end = dates[-1]
start = dates[-batch]
start = dates[-batch] if batch <= len(dates) else dates[0]
index = (date_label >= start) & (date_label <= end)
this_raw_x = x_values[index]
......@@ -311,13 +320,13 @@ def fetch_train_phase(engine,
ret = dict()
ret['x_names'] = transformer.names
ret['train'] = {'x': ne_x, 'y': ne_y, 'code': this_code}
ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code}
return ret
def fetch_predict_phase(engine,
alpha_factors: Iterable[object],
alpha_factors: Union[Transformer, Iterable[object]],
ref_date,
frequency,
universe,
......@@ -326,8 +335,12 @@ def fetch_predict_phase(engine,
risk_model: str = 'short',
pre_process: Iterable[object] = None,
post_process: Iterable[object] = None,
warm_start: int = 0):
transformer = Transformer(alpha_factors)
warm_start: int = 0,
fillna: str=None):
if isinstance(alpha_factors, Transformer):
transformer = alpha_factors
else:
transformer = Transformer(alpha_factors)
p = Period(frequency)
p = Period(length=-(warm_start + batch) * p.length(), units=p.units())
......@@ -340,7 +353,12 @@ def fetch_predict_phase(engine,
dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).dropna()
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fillna:
factor_df = factor_df.groupby('trade_date').apply(lambda x: x.fillna(x.median())).reset_index(drop=True).dropna()
else:
factor_df = factor_df.dropna()
names = transformer.names
......@@ -360,7 +378,7 @@ def fetch_predict_phase(engine,
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
end = dates[-1]
start = dates[-batch]
start = dates[-batch] if batch <= len(dates) else dates[0]
left_index = bisect.bisect_left(date_label, start)
right_index = bisect.bisect_right(date_label, end)
......@@ -392,7 +410,7 @@ def fetch_predict_phase(engine,
ret = dict()
ret['x_names'] = transformer.names
ret['predict'] = {'x': ne_x, 'code': codes}
ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes}
return ret
......
......@@ -19,7 +19,7 @@ from alphamind.utilities import alpha_logger
class ConstLinearModelImpl(object):
def __init__(self, weights: np.ndarray = None):
self.weights = np.array(weights).flatten()
self.weights = weights.flatten()
def fit(self, x: np.ndarray, y: np.ndarray):
pass
......@@ -31,15 +31,15 @@ class ConstLinearModelImpl(object):
class ConstLinearModel(ModelBase):
def __init__(self,
features: list = None,
formulas: dict = None,
weights: np.ndarray = None):
super().__init__(features, formulas=formulas)
features=None,
weights: dict = None):
super().__init__(features)
if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights),
ValueError,
"length of features is not equal to length of weights")
self.impl = ConstLinearModelImpl(weights)
if weights:
self.impl = ConstLinearModelImpl(np.array([weights[name] for name in self.features]))
def save(self):
model_desc = super().save()
......@@ -57,10 +57,9 @@ class ConstLinearModel(ModelBase):
class LinearRegression(ModelBase):
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict:
model_desc = super().save()
......@@ -85,10 +84,9 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict:
model_desc = super().save()
......@@ -113,8 +111,8 @@ class LassoRegression(ModelBase):
class LogisticRegression(ModelBase):
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas)
def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict:
......
......@@ -6,34 +6,42 @@ Created on 2017-9-4
"""
import abc
import copy
import arrow
import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
from alphamind.data.transformer import Transformer
class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None, formulas: dict=None):
def __init__(self, features=None):
if features is not None:
self.features = list(features)
self.formulas = Transformer(features)
self.features = self.formulas.names
else:
self.features = None
self.impl = None
self.formulas = copy.deepcopy(formulas)
self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray):
self.impl.fit(x, y.flatten())
def __eq__(self, rhs):
return encode(self.impl) == encode(rhs.impl) \
and self.trained_time == rhs.trained_time \
and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas)
def fit(self, x: pd.DataFrame, y: np.ndarray):
self.impl.fit(x[self.features].values, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
return self.impl.predict(x)
def predict(self, x: pd.DataFrame) -> np.ndarray:
return self.impl.predict(x[self.features].values)
def score(self, x: np.ndarray, y: np.ndarray) -> float:
return self.impl.score(x, y)
def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
return self.impl.score(x[self.features].values, y)
@abc.abstractmethod
def save(self) -> dict:
......
......@@ -5,10 +5,10 @@ Created on 2017-12-4
@author: cheng.li
"""
from typing import List
from distutils.version import LooseVersion
import arrow
import numpy as np
import pandas as pd
from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
......@@ -26,18 +26,16 @@ class RandomForestRegressor(ModelBase):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List=None,
features=None,
**kwargs):
super().__init__(features, **kwargs)
super().__init__(features)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -60,19 +58,16 @@ class RandomForestClassifier(ModelBase):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
features: List = None,
formulas: dict = None,
features=None,
**kwargs):
super().__init__(features, formulas=formulas)
super().__init__(features)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features,
**kwargs)
self.trained_time = None
def save(self) -> dict:
model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -96,11 +91,10 @@ class XGBRegressor(ModelBase):
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List=None,
formulas: dict = None,
features=None,
n_jobs: int=1,
**kwargs):
super().__init__(features, formulas=formulas)
super().__init__(features)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
......@@ -110,7 +104,6 @@ class XGBRegressor(ModelBase):
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -134,11 +127,10 @@ class XGBClassifier(ModelBase):
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
features: List = None,
formulas: dict = None,
features=None,
n_jobs: int=1,
**kwargs):
super().__init__(features, formulas=formulas)
super().__init__(features)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
......@@ -148,7 +140,6 @@ class XGBClassifier(ModelBase):
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......@@ -179,12 +170,11 @@ class XGBTrainer(ModelBase):
early_stopping_rounds=None,
subsample=1.,
colsample_bytree=1.,
features: List = None,
formulas: dict = None,
features=None,
random_state: int=0,
n_jobs: int=1,
**kwargs):
super().__init__(features, formulas=formulas)
super().__init__(features)
self.params = {
'silent': 1,
'objective': objective,
......@@ -204,9 +194,9 @@ class XGBTrainer(ModelBase):
self.impl = None
self.kwargs = kwargs
def fit(self, x, y):
def fit(self, x: pd.DataFrame, y: np.ndarray):
if self.eval_sample:
x_train, x_eval, y_train, y_eval = train_test_split(x,
x_train, x_eval, y_train, y_eval = train_test_split(x[self.features].values,
y,
test_size=self.eval_sample,
random_state=42)
......@@ -219,7 +209,7 @@ class XGBTrainer(ModelBase):
verbose_eval=False,
**self.kwargs)
else:
d_train = xgb.DMatrix(x, y)
d_train = xgb.DMatrix(x[self.features].values, y)
self.impl = xgb.train(params=self.params,
dtrain=d_train,
num_boost_round=self.num_boost_round,
......@@ -227,14 +217,13 @@ class XGBTrainer(ModelBase):
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray:
d_predict = xgb.DMatrix(x)
def predict(self, x: pd.DataFrame) -> np.ndarray:
d_predict = xgb.DMatrix(x[self.features].values)
return self.impl.predict(d_predict)
def save(self) -> dict:
model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc
@classmethod
......
......@@ -16,9 +16,9 @@ class Allocation(object):
def __init__(self,
code: int,
minimum: int=0,
maximum: int=inf,
current: int=0):
minimum: int = 0,
maximum: int = inf,
current: int = 0):
self.code = code
self.minimum = minimum
self.maximum = maximum
......@@ -62,39 +62,32 @@ class Portfolio(object):
class Execution(object):
def __init__(self,
name: str,
code: int,
qty: int,
comment: str=None):
cpty: str = 'external',
comment: str = None):
self.name = name
self.code = code
self.qty = qty
self.cpty = cpty
self.comment = comment
def __repr__(self):
return "Execution(code={0}, qty={1}, comment={2})".format(self.code,
self.qty,
self.comment)
class Executions(object):
def __init__(self,
name,
executions: List[Execution]=None):
self.name = name
self.executions = executions
def __repr__(self):
return "Executions(name={0}, executions={1})".format(self.name,
self.executions)
return "Execution(name={0}, code={1}, qty={2}, cpty={3}, comment={4})".format(self.name,
self.code,
self.qty,
self.cpty,
self.comment)
class Asset(object):
def __init__(self,
code: int,
name: str=None,
priority: List[str]=None,
forbidden: List[str]=None):
name: str = None,
priority: List[str] = None,
forbidden: List[str] = None):
self.code = code
self.name = name
if priority:
......@@ -119,11 +112,11 @@ class Asset(object):
self.forbidden)
class TargetPositions(object):
class Positions(object):
def __init__(self,
assets: List[Asset]=None,
qtys: List[int]=None):
assets: List[Asset] = None,
qtys: List[int] = None):
if assets:
self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
......@@ -133,9 +126,9 @@ class TargetPositions(object):
def add_asset(self,
asset: Asset,
qty: int):
if asset.code in self.targets:
raise ValueError()
self.targets[asset.code] = (asset, qty)
code = asset.code
pyFinAssert(code not in self.targets, ValueError, "code {0} is already in positions".format(code))
self.targets[code] = (asset, qty)
def __getitem__(self, code: int) -> Tuple[Asset, int]:
return self.targets[code]
......@@ -148,10 +141,10 @@ class TargetPositions(object):
return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
def handle_one_asset(pre_allocation: Allocation,
def handle_one_asset(p_name: str,
pre_allocation: Allocation,
asset: Asset,
qty: int) -> Tuple[Execution, Allocation, int]:
minimum = pre_allocation.minimum
maximum = pre_allocation.maximum
current = pre_allocation.current
......@@ -161,14 +154,20 @@ def handle_one_asset(pre_allocation: Allocation,
raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
elif qty < maximum:
# need to buy / sell
ex = Execution(code, qty - current)
ex = Execution(name=p_name,
code=code,
qty=qty - current,
cpty='external')
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
current=qty)
qty = 0
else:
ex = Execution(code, maximum - current)
ex = Execution(name=p_name,
code=code,
qty=maximum - current,
cpty='external')
allocation = Allocation(code,
minimum=minimum,
maximum=maximum,
......@@ -177,41 +176,40 @@ def handle_one_asset(pre_allocation: Allocation,
return ex, allocation, qty
def pass_through(target_pos: TargetPositions,
portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]:
def pass_through(target_pos: Positions,
portfolio: Portfolio) -> Tuple[List[Execution], Portfolio, Positions]:
p_name = portfolio.name
new_target_pos = TargetPositions()
new_target_pos = Positions()
allocations = []
executions = []
for code in target_pos.codes:
asset, qty = target_pos[code]
if asset.priority:
raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
pyFinAssert(not asset.priority,
ValueError,
"asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
if p_name in asset.forbidden:
ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
allocation = copy.deepcopy(portfolio[code])
new_target_pos.add_asset(asset, qty)
else:
prev_allocation = portfolio[code]
ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty)
ex, allocation, qty = handle_one_asset(p_name, prev_allocation, asset, qty)
new_target_pos.add_asset(asset, qty)
if ex.qty != 0:
executions.append(ex)
allocations.append(allocation)
executions.append(ex)
return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos
return executions, Portfolio(p_name, allocations), new_target_pos
if __name__ == '__main__':
asset1 = Asset(1, 'a')
asset2 = Asset(2, 'b')
asset3 = Asset(3, 'b')
target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100])
target_pos = Positions([asset1, asset2, asset3], [200, 300, 100])
allc1 = Allocation(1, 0, 100, 0)
allc2 = Allocation(2, 0, 400, 100)
......@@ -219,8 +217,3 @@ if __name__ == '__main__':
portfolio = Portfolio('test1', [allc1, allc2])
executions, portfolio, target_pos = pass_through(target_pos, portfolio)
# -*- coding: utf-8 -*-
"""
Created on 2018-2-9
@author: cheng.li
"""
import unittest
from PyFin.api import LAST
from alphamind.data.engines.universe import Universe
class TestUniverse(unittest.TestCase):
def test_universe_equal(self):
universe1 = Universe('custom', ['zz500'])
universe2 = Universe('custom', ['zz500'])
self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500'])
universe2 = Universe('custom', ['zz800'])
self.assertNotEqual(universe1, universe2)
filter_cond = LAST('x') > 1.
universe1 = Universe('custom', ['zz500'], filter_cond=filter_cond)
universe2 = Universe('custom', ['zz500'], filter_cond=filter_cond)
self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
universe2 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 2.)
self.assertNotEqual(universe1, universe2)
def test_universe_persistence(self):
universe = Universe('custom', ['zz500'])
univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc)
self.assertEqual(universe.name, loaded_universe.name)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe)
universe = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc)
self.assertEqual(universe.name, loaded_universe.name)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe)
self.assertEqual(str(universe.filter_cond), str(loaded_universe.filter_cond))
# -*- coding: utf-8 -*-
"""
Created on 2018-2-9
@author: cheng.li
"""
import unittest
from alphamind.data.engines.universe import Universe
from alphamind.model.composer import DataMeta
from alphamind.model.composer import Composer
from alphamind.model.treemodel import XGBClassifier
class TestComposer(unittest.TestCase):
def _assert_composer_equal(self, lhs: Composer, rhs: Composer):
self.assertEqual(lhs.alpha_model, rhs.alpha_model)
self.assertEqual(lhs.data_meta, rhs.data_meta)
def test_data_meta_persistence(self):
freq = '5b'
universe = Universe('custom', ['zz800'])
batch = 4
neutralized_risk = ['SIZE']
risk_model = 'long'
pre_process = ['standardize', 'winsorize_normal']
post_process = ['standardize', 'winsorize_normal']
warm_start = 2
data_source = 'postgresql://user:pwd@server/dummy'
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source)
data_desc = data_meta.save()
loaded_data = DataMeta.load(data_desc)
self.assertEqual(data_meta.freq, loaded_data.freq)
self.assertEqual(data_meta.universe, loaded_data.universe)
self.assertEqual(data_meta.batch, loaded_data.batch)
self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk)
self.assertEqual(data_meta.risk_model, loaded_data.risk_model)
self.assertEqual(data_meta.pre_process, loaded_data.pre_process)
self.assertEqual(data_meta.post_process, loaded_data.post_process)
self.assertEqual(data_meta.warm_start, loaded_data.warm_start)
self.assertEqual(data_meta.data_source, loaded_data.data_source)
def test_composer_persistence(self):
freq = '5b'
universe = Universe('custom', ['zz800'])
batch = 4
neutralized_risk = ['SIZE']
risk_model = 'long'
pre_process = ['standardize', 'winsorize_normal']
post_process = ['standardize', 'winsorize_normal']
warm_start = 2
data_source = 'postgresql://user:pwd@server/dummy'
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source)
features = {'f1': 'closePrice', 'f2': 'openPrice'}
alpha_model = XGBClassifier(features=features)
composer = Composer(alpha_model=alpha_model,
data_meta=data_meta)
comp_desc = composer.save()
loaded_comp = Composer.load(comp_desc)
self._assert_composer_equal(composer, loaded_comp)
......@@ -7,6 +7,7 @@ Created on 2017-9-4
import unittest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
......@@ -19,23 +20,25 @@ class TestLinearModel(unittest.TestCase):
def setUp(self):
self.n = 3
self.train_x = np.random.randn(1000, self.n)
self.features = ['a', 'b', 'c']
self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n)
self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self):
weights = np.array([1., 2., 3.])
model = ConstLinearModel(features=['a', 'b', 'c'],
features = ['c', 'b', 'a']
weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=features,
weights=weights)
calculated_y = model.predict(self.predict_x)
expected_y = self.predict_x @ weights
expected_y = self.predict_x[features] @ np.array([weights[f] for f in features])
np.testing.assert_array_almost_equal(calculated_y, expected_y)
def test_const_linear_model_persistence(self):
weights = np.array([1., 2., 3.])
weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=['a', 'b', 'c'],
weights=weights)
......
......@@ -7,6 +7,7 @@ Created on 2017-9-5
import unittest
import numpy as np
import pandas as pd
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model
......@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
def setUp(self):
self.n = 3
self.trained_x = np.random.randn(1000, self.n)
self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.trained_y = np.random.randn(1000, 1)
self.predict_x = np.random.randn(100, self.n)
self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
def test_load_model(self):
model = LinearRegression(['a', 'b', 'c'])
......
# -*- coding: utf-8 -*-
"""
Created on 2018-2-8
@author: cheng.li
"""
import unittest
from alphamind.model.linearmodel import ConstLinearModel
class TestModelBase(unittest.TestCase):
def test_simple_model_features(self):
model = ConstLinearModel(features=['c', 'b', 'a'])
self.assertListEqual(['a', 'b', 'c'], model.features)
\ No newline at end of file
......@@ -7,6 +7,7 @@ Created on 2018-1-5
import unittest
import numpy as np
import pandas as pd
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
......@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
class TestTreeModel(unittest.TestCase):
def setUp(self):
self.x = np.random.randn(1000, 10)
self.features = list('0123456789')
self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
self.y = np.random.randn(1000)
self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10)))
model = RandomForestRegressor(features=self.features)
model.fit(self.x, self.y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10)))
model = RandomForestClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y)
......@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10)))
model = XGBRegressor(features=self.features)
model.fit(self.x, self.y)
desc = model.save()
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10)))
model = XGBClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y)
......@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self):
sample_x = np.random.randn(100, 10)
model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1,
max_depth=3,
features=list(range(10)),
features=self.features,
random_state=42)
model2 = XGBTrainer(features=list(range(10)),
model2 = XGBTrainer(features=self.features,
objective='reg:logistic',
booster='gbtree',
tree_method='exact',
......@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
model1.fit(self.x, y)
model2.fit(self.x, y)
predict1 = model1.predict(sample_x)
predict2 = model2.predict(sample_x)
predict1 = model1.predict(self.sample_x)
predict2 = model2.predict(self.sample_x)
predict2 = np.where(predict2 > 0.5, 1., 0.)
np.testing.assert_array_almost_equal(predict1, predict2)
def test_xgb_trainer_persistence(self):
model = XGBTrainer(features=list(range(10)),
model = XGBTrainer(features=self.features,
objective='binary:logistic',
booster='gbtree',
tree_method='hist',
......@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10)
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
# -*- coding: utf-8 -*-
"""
Created on 2018-2-7
@author: cheng.li
"""
import unittest
from alphamind.portfolio.allocations import Asset
from alphamind.portfolio.allocations import Portfolio
from alphamind.portfolio.allocations import Positions
from alphamind.portfolio.allocations import Execution
class TestAllocation(unittest.TestCase):
pass
\ No newline at end of file
......@@ -15,6 +15,7 @@ from alphamind.tests.data.test_neutralize import TestNeutralize
from alphamind.tests.data.test_standardize import TestStandardize
from alphamind.tests.data.test_winsorize import TestWinsorize
from alphamind.tests.data.test_quantile import TestQuantile
from alphamind.tests.data.engines.test_universe import TestUniverse
from alphamind.tests.portfolio.test_constraints import TestConstraints
from alphamind.tests.portfolio.test_evolver import TestEvolver
from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
......@@ -27,9 +28,11 @@ from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
from alphamind.tests.model.test_modelbase import TestModelBase
from alphamind.tests.model.test_linearmodel import TestLinearModel
from alphamind.tests.model.test_treemodel import TestTreeModel
from alphamind.tests.model.test_loader import TestLoader
from alphamind.tests.model.test_composer import TestComposer
from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor
......@@ -42,6 +45,7 @@ if __name__ == '__main__':
TestStandardize,
TestWinsorize,
TestQuantile,
TestUniverse,
TestConstraints,
TestEvolver,
TestLongShortBuild,
......@@ -54,9 +58,11 @@ if __name__ == '__main__':
TestPerformanceAnalysis,
TestFactorAnalysis,
TestQuantileAnalysis,
TestModelBase,
TestLinearModel,
TestTreeModel,
TestLoader,
TestComposer,
TestNaiveExecutor,
TestThresholdExecutor,
TestTargetVolExecutor,
......
This diff is collapsed.
......@@ -6,7 +6,7 @@ pandas >= 0.19.2
scikit-learn >= 0.18.1
numba >= 0.33.0
scipy >= 0.19.0
simpleutils >= 0.1.0
simpleutils >= 0.1.2
sqlalchemy >= 1.1.14
psycopg2 >= 2.7.1
finance-python >= 0.5.7
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment