Commit 4b49b7bb authored by Yucheng's avatar Yucheng

Merge branch 'master' of https://github.com/lion-sing/alpha-mind

parents 6c4fb657 2c6b4668
...@@ -33,6 +33,8 @@ install: ...@@ -33,6 +33,8 @@ install:
- conda install pandas - conda install pandas
- conda install scikit-learn - conda install scikit-learn
- conda install cython - conda install cython
- conda install sqlalchemy
- conda install psycopg2
- conda install -c conda-forge arrow - conda install -c conda-forge arrow
- pip install simpleutils - pip install simpleutils
- pip install coveralls - pip install coveralls
......
...@@ -48,7 +48,7 @@ alpha - mind 提供了多因子研究中常用的工具链,包括: ...@@ -48,7 +48,7 @@ alpha - mind 提供了多因子研究中常用的工具链,包括:
* Linux * Linux
   在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran): 在linux上,需要c++编译器(例如g++)以及fortran编译器(例如gfortran):
```bash ```bash
build_linux_dependencies.sh build_linux_dependencies.sh
......
...@@ -43,6 +43,10 @@ from alphamind.model import load_model ...@@ -43,6 +43,10 @@ from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta
from alphamind.model.composer import train_model
from alphamind.model.composer import predict_by_model
from alphamind.execution.naiveexecutor import NaiveExecutor from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor from alphamind.execution.thresholdexecutor import ThresholdExecutor
...@@ -79,6 +83,10 @@ __all__ = [ ...@@ -79,6 +83,10 @@ __all__ = [
'fetch_data_package', 'fetch_data_package',
'fetch_train_phase', 'fetch_train_phase',
'fetch_predict_phase', 'fetch_predict_phase',
'Composer',
'DataMeta',
'train_model',
'predict_by_model',
'LinearRegression', 'LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
......
...@@ -5,8 +5,7 @@ Created on 2017-6-29 ...@@ -5,8 +5,7 @@ Created on 2017-6-29
@author: cheng.li @author: cheng.li
""" """
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text, JSON
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base() Base = declarative_base()
...@@ -635,7 +634,7 @@ class DailyPortfolios(Base): ...@@ -635,7 +634,7 @@ class DailyPortfolios(Base):
industry = Column(String(50), nullable=False) industry = Column(String(50), nullable=False)
benchmark_weight = Column(Float(53), nullable=False) benchmark_weight = Column(Float(53), nullable=False)
is_tradable = Column(Boolean, nullable=False) is_tradable = Column(Boolean, nullable=False)
factor = Column(JSONB) factor = Column(JSON)
class DailyPortfoliosSchedule(Base): class DailyPortfoliosSchedule(Base):
...@@ -857,7 +856,8 @@ class Models(Base): ...@@ -857,7 +856,8 @@ class Models(Base):
model_type = Column(String(30), nullable=False) model_type = Column(String(30), nullable=False)
model_version = Column(BigInteger, nullable=False) model_version = Column(BigInteger, nullable=False)
update_time = Column(DateTime, nullable=False) update_time = Column(DateTime, nullable=False)
model_desc = Column(JSONB, nullable=False) model_desc = Column(JSON, nullable=False)
data_meta = Column(JSON, nullable=True)
is_primary = Column(Boolean) is_primary = Column(Boolean)
model_id = Column(Integer, primary_key=True, autoincrement=True) model_id = Column(Integer, primary_key=True, autoincrement=True)
...@@ -915,7 +915,7 @@ class Positions(Base): ...@@ -915,7 +915,7 @@ class Positions(Base):
trade_date = Column(DateTime, primary_key=True, nullable=False) trade_date = Column(DateTime, primary_key=True, nullable=False)
portfolio = Column(String(50), primary_key=True, nullable=False) portfolio = Column(String(50), primary_key=True, nullable=False)
type = Column(String(50), primary_key=True, nullable=False) type = Column(String(50), primary_key=True, nullable=False)
weight = Column(JSONB) weight = Column(JSON)
class QuantileAnalysis(Base): class QuantileAnalysis(Base):
...@@ -1865,7 +1865,7 @@ class Formulas(Base): ...@@ -1865,7 +1865,7 @@ class Formulas(Base):
__tablename__ = 'formulas' __tablename__ = 'formulas'
formula = Column(String(50), primary_key=True) formula = Column(String(50), primary_key=True)
formula_desc = Column(JSONB, nullable=False) formula_desc = Column(JSON, nullable=False)
comment = Column(Text) comment = Column(Text)
......
...@@ -155,6 +155,13 @@ class SqlEngine(object): ...@@ -155,6 +155,13 @@ class SqlEngine(object):
dates: Iterable[str] = None) -> pd.DataFrame: dates: Iterable[str] = None) -> pd.DataFrame:
return universe.query(self, start_date, end_date, dates) return universe.query(self, start_date, end_date, dates)
def _create_stats(self, table, horizon, offset, code_attr='code'):
stats = func.sum(self.ln_func(1. + table.chgPct)).over(
partition_by=getattr(table, code_attr),
order_by=table.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
return stats
def fetch_dx_return(self, def fetch_dx_return(self,
ref_date: str, ref_date: str,
codes: Iterable[int], codes: Iterable[int],
...@@ -169,10 +176,7 @@ class SqlEngine(object): ...@@ -169,10 +176,7 @@ class SqlEngine(object):
else: else:
end_date = expiry_date end_date = expiry_date
stats = func.sum(self.ln_func(1. + Market.chgPct)).over( stats = self._create_stats(Market, horizon, offset)
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
query = select([Market.trade_date, Market.code, stats]).where( query = select([Market.trade_date, Market.code, stats]).where(
and_( and_(
...@@ -200,24 +204,22 @@ class SqlEngine(object): ...@@ -200,24 +204,22 @@ class SqlEngine(object):
end_date = advanceDateByCalendar('china.sse', end_date, end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
stats = func.sum(self.ln_func(1. + Market.chgPct)).over( stats = self._create_stats(Market, horizon, offset)
partition_by=Market.code,
order_by=Market.trade_date,
rows=(1 + offset + DAILY_RETURN_OFFSET, 1 + horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
cond = universe._query_statements(start_date, end_date, None) cond = universe._query_statements(start_date, end_date, None)
big_table = join(Market, UniverseTable, t = select([Market.trade_date, Market.code, stats]).where(
Market.trade_date.between(start_date, end_date)
).alias('t')
big_table = join(t, UniverseTable,
and_( and_(
Market.trade_date == UniverseTable.trade_date, t.columns['trade_date'] == UniverseTable.trade_date,
Market.code == UniverseTable.code, t.columns['code'] == UniverseTable.code,
cond cond
) )
) )
query = select([Market.trade_date, Market.code, stats]) \ query = select([t]).select_from(big_table)
.select_from(big_table)
df = pd.read_sql(query, self.session.bind).dropna() df = pd.read_sql(query, self.session.bind).dropna()
if universe.is_filtered: if universe.is_filtered:
...@@ -242,10 +244,7 @@ class SqlEngine(object): ...@@ -242,10 +244,7 @@ class SqlEngine(object):
else: else:
end_date = expiry_date end_date = expiry_date
stats = func.sum(self.ln_func(1. + IndexMarket.chgPct)).over( stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode')
partition_by=IndexMarket.indexCode,
order_by=IndexMarket.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]).where( query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]).where(
and_( and_(
...@@ -273,10 +272,7 @@ class SqlEngine(object): ...@@ -273,10 +272,7 @@ class SqlEngine(object):
end_date = advanceDateByCalendar('china.sse', end_date, end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d') str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
stats = func.sum(self.ln_func(1. + IndexMarket.chgPct)).over( stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode')
partition_by=IndexMarket.indexCode,
order_by=IndexMarket.trade_date,
rows=(1 + offset + DAILY_RETURN_OFFSET, 1 + horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]) \ query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]) \
.where( .where(
...@@ -360,9 +356,11 @@ class SqlEngine(object): ...@@ -360,9 +356,11 @@ class SqlEngine(object):
factor_cols = _map_factors(dependency, factor_tables) factor_cols = _map_factors(dependency, factor_tables)
big_table = FullFactor big_table = FullFactor
joined_tables = set()
joined_tables.add(FullFactor.__table__.name)
for t in set(factor_cols.values()): for t in set(factor_cols.values()):
if t.__table__.name != FullFactor.__table__.name: if t.__table__.name not in joined_tables:
if dates is not None: if dates is not None:
big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date, big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date,
FullFactor.code == t.code, FullFactor.code == t.code,
...@@ -371,20 +369,18 @@ class SqlEngine(object): ...@@ -371,20 +369,18 @@ class SqlEngine(object):
big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date, big_table = outerjoin(big_table, t, and_(FullFactor.trade_date == t.trade_date,
FullFactor.code == t.code, FullFactor.code == t.code,
FullFactor.trade_date.between(start_date, end_date))) FullFactor.trade_date.between(start_date, end_date)))
joined_tables.add(t.__table__.name)
cond = universe._query_statements(start_date, end_date, dates) universe_df = universe.query(self, start_date, end_date, dates)
big_table = join(big_table, UniverseTable,
and_(
FullFactor.trade_date == UniverseTable.trade_date,
FullFactor.code == UniverseTable.code,
cond
)
)
query = select( query = select(
[FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \ [FullFactor.trade_date, FullFactor.code, FullFactor.isOpen] + list(factor_cols.keys())) \
.select_from(big_table).distinct() .select_from(big_table).where(
and_(
FullFactor.code.in_(universe_df.code.unique().tolist()),
FullFactor.trade_date.in_(dates) if dates is not None else FullFactor.trade_date.between(start_date, end_date)
)
).distinct()
df = pd.read_sql(query, self.engine) df = pd.read_sql(query, self.engine)
if universe.is_filtered: if universe.is_filtered:
...@@ -395,7 +391,6 @@ class SqlEngine(object): ...@@ -395,7 +391,6 @@ class SqlEngine(object):
df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna() df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()
df.sort_values(['trade_date', 'code'], inplace=True) df.sort_values(['trade_date', 'code'], inplace=True)
df.set_index('trade_date', inplace=True) df.set_index('trade_date', inplace=True)
res = transformer.transform('code', df) res = transformer.transform('code', df)
...@@ -404,11 +399,13 @@ class SqlEngine(object): ...@@ -404,11 +399,13 @@ class SqlEngine(object):
df[col] = res[col].values df[col] = res[col].values
df['isOpen'] = df.isOpen.astype(bool) df['isOpen'] = df.isOpen.astype(bool)
return df.reset_index() df = df.reset_index()
return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')
def fetch_benchmark(self, def fetch_benchmark(self,
ref_date: str, ref_date: str,
benchmark: int) -> pd.DataFrame: benchmark: int,
codes: Iterable[int]=None) -> pd.DataFrame:
query = select([IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where( query = select([IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where(
and_( and_(
IndexComponent.trade_date == ref_date, IndexComponent.trade_date == ref_date,
...@@ -416,7 +413,13 @@ class SqlEngine(object): ...@@ -416,7 +413,13 @@ class SqlEngine(object):
) )
) )
return pd.read_sql(query, self.engine) df = pd.read_sql(query, self.engine)
if codes:
df.set_index(['code'], inplace=True)
df = df.reindex(codes).fillna(0.)
df.reset_index(inplace=True)
return df
def fetch_benchmark_range(self, def fetch_benchmark_range(self,
benchmark: int, benchmark: int,
...@@ -613,7 +616,7 @@ class SqlEngine(object): ...@@ -613,7 +616,7 @@ class SqlEngine(object):
res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s] res = df[['trade_date', 'code', 'industry_code', 'industry_name'] + in_s]
res = res.assign(**dict(zip(out_s, [0]*len(out_s)))) res = res.assign(**dict(zip(out_s, [0] * len(out_s))))
return res return res
def fetch_trade_status(self, def fetch_trade_status(self,
...@@ -747,6 +750,7 @@ class SqlEngine(object): ...@@ -747,6 +750,7 @@ class SqlEngine(object):
model_version=None, model_version=None,
is_primary=True, is_primary=True,
model_id=None) -> pd.DataFrame: model_id=None) -> pd.DataFrame:
from alphamind.model.composer import DataMeta
conditions = [] conditions = []
...@@ -768,8 +772,10 @@ class SqlEngine(object): ...@@ -768,8 +772,10 @@ class SqlEngine(object):
model_df = pd.read_sql(query, self.engine) model_df = pd.read_sql(query, self.engine)
for i, model_desc in enumerate(model_df.model_desc): for i, data in enumerate(zip(model_df.model_desc, model_df.data_meta)):
model_desc, data_desc = data
model_df.loc[i, 'model'] = load_model(model_desc) model_df.loc[i, 'model'] = load_model(model_desc)
model_df.loc[i, 'data_meta'] = DataMeta.load(data_desc)
del model_df['model_desc'] del model_df['model_desc']
return model_df return model_df
...@@ -923,10 +929,11 @@ class SqlEngine(object): ...@@ -923,10 +929,11 @@ class SqlEngine(object):
if __name__ == '__main__': if __name__ == '__main__':
universe = Universe('ss', ['hs300'])
from PyFin.api import *
engine = SqlEngine() engine = SqlEngine()
ref_date = '2017-12-28' ref_date = '2017-06-29'
codes = universe.query(engine, dates=[ref_date]) universe = Universe('', ['zz800'])
df = engine.fetch_trade_status(ref_date, codes.code.tolist())
print(df) dates = makeSchedule('2010-01-01', '2018-02-01', '10b', 'china.sse')
\ No newline at end of file df = engine.fetch_factor_range(universe, DIFF('roe_q'), dates=dates)
...@@ -7,17 +7,19 @@ Created on 2017-7-7 ...@@ -7,17 +7,19 @@ Created on 2017-7-7
from typing import Iterable from typing import Iterable
import pandas as pd import pandas as pd
from simpleutils.miscellaneous import list_eq
from sqlalchemy import and_ from sqlalchemy import and_
from sqlalchemy import or_ from sqlalchemy import or_
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy import join from sqlalchemy import join
from sqlalchemy import outerjoin from sqlalchemy import outerjoin
from PyFin.api import pyFinAssert
from alphamind.data.dbmodel.models import Universe as UniverseTable from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import FullFactor from alphamind.data.dbmodel.models import FullFactor
from alphamind.data.engines.utilities import _map_factors from alphamind.data.engines.utilities import _map_factors
from alphamind.data.engines.utilities import factor_tables from alphamind.data.engines.utilities import factor_tables
from alphamind.data.transformer import Transformer from alphamind.data.transformer import Transformer
from alphamind.utilities import encode
from alphamind.utilities import decode
class Universe(object): class Universe(object):
...@@ -25,15 +27,22 @@ class Universe(object): ...@@ -25,15 +27,22 @@ class Universe(object):
def __init__(self, def __init__(self,
name: str, name: str,
base_universe: Iterable, base_universe: Iterable,
exclude_universe: Iterable=None, exclude_universe: Iterable = None,
special_codes: Iterable=None, special_codes: Iterable = None,
filter_cond=None): filter_cond=None):
self.name = name self.name = name
self.base_universe = base_universe self.base_universe = sorted(base_universe) if base_universe else None
self.exclude_universe = exclude_universe self.exclude_universe = sorted(exclude_universe) if exclude_universe else None
self.special_codes = special_codes self.special_codes = sorted(special_codes) if special_codes else None
self.filter_cond = filter_cond self.filter_cond = filter_cond
def __eq__(self, rhs):
return self.name == rhs.name \
and list_eq(self.base_universe, rhs.base_universe) \
and list_eq(self.exclude_universe, rhs.exclude_universe) \
and list_eq(self.special_codes, rhs.special_codes) \
and str(self.filter_cond) == str(rhs.filter_cond)
@property @property
def is_filtered(self): def is_filtered(self):
return True if self.filter_cond is not None else False return True if self.filter_cond is not None else False
...@@ -59,7 +68,7 @@ class Universe(object): ...@@ -59,7 +68,7 @@ class Universe(object):
*and_conditions *and_conditions
) )
def query(self, engine, start_date: str=None, end_date: str=None, dates=None) -> pd.DataFrame: def query(self, engine, start_date: str = None, end_date: str = None, dates=None) -> pd.DataFrame:
universe_cond = self._query_statements(start_date, end_date, dates) universe_cond = self._query_statements(start_date, end_date, dates)
...@@ -103,6 +112,29 @@ class Universe(object): ...@@ -103,6 +112,29 @@ class Universe(object):
df = df[df[filter_fields[0]] == 1].reset_index()[['trade_date', 'code']] df = df[df[filter_fields[0]] == 1].reset_index()[['trade_date', 'code']]
return df return df
def save(self):
return dict(
name=self.name,
base_universe=self.base_universe,
exclude_universe=self.exclude_universe,
special_codes=self.special_codes,
filter_cond=encode(self.filter_cond)
)
@classmethod
def load(cls, universe_desc: dict):
name = universe_desc['name']
base_universe = universe_desc['base_universe']
exclude_universe = universe_desc['exclude_universe']
special_codes = universe_desc['special_codes']
filter_cond = decode(universe_desc['filter_cond'])
return cls(name=name,
base_universe=base_universe,
exclude_universe=exclude_universe,
special_codes=special_codes,
filter_cond=filter_cond)
if __name__ == '__main__': if __name__ == '__main__':
from PyFin.api import * from PyFin.api import *
......
...@@ -12,9 +12,6 @@ from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder ...@@ -12,9 +12,6 @@ from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import transform as transform_impl from PyFin.api import transform as transform_impl
DEFAULT_FACTOR_NAME = 'user_factor'
def factor_translator(factor_pool): def factor_translator(factor_pool):
if not factor_pool: if not factor_pool:
...@@ -23,7 +20,7 @@ def factor_translator(factor_pool): ...@@ -23,7 +20,7 @@ def factor_translator(factor_pool):
if isinstance(factor_pool, str): if isinstance(factor_pool, str):
return {factor_pool: factor_pool}, [factor_pool] return {factor_pool: factor_pool}, [factor_pool]
elif isinstance(factor_pool, SecurityValueHolder): elif isinstance(factor_pool, SecurityValueHolder):
return {DEFAULT_FACTOR_NAME: factor_pool}, sorted(factor_pool.fields) return {str(factor_pool): factor_pool}, sorted(factor_pool.fields)
elif isinstance(factor_pool, dict): elif isinstance(factor_pool, dict):
dependency = set() dependency = set()
for k, v in factor_pool.items(): for k, v in factor_pool.items():
...@@ -46,7 +43,7 @@ def factor_translator(factor_pool): ...@@ -46,7 +43,7 @@ def factor_translator(factor_pool):
factor_dict[f] = f factor_dict[f] = f
dependency = dependency.union([f]) dependency = dependency.union([f])
elif isinstance(f, SecurityValueHolder): elif isinstance(f, SecurityValueHolder):
factor_dict[DEFAULT_FACTOR_NAME + '_' + str(k).zfill(3)] = f factor_dict[str(f)] = f
dependency = dependency.union(f.fields) dependency = dependency.union(f.fields)
k += 1 k += 1
return factor_dict, sorted(dependency) return factor_dict, sorted(dependency)
...@@ -80,3 +77,8 @@ class Transformer(object): ...@@ -80,3 +77,8 @@ class Transformer(object):
return transformed_data return transformed_data
else: else:
return pd.DataFrame() return pd.DataFrame()
if __name__ == '__main__':
transformer = Transformer(['c', 'a'])
...@@ -21,7 +21,7 @@ Back test parameter settings ...@@ -21,7 +21,7 @@ Back test parameter settings
""" """
start_date = '2010-01-01' start_date = '2010-01-01'
end_date = '2018-01-26' end_date = '2018-01-29'
frequency = '10b' frequency = '10b'
method = 'risk_neutral' method = 'risk_neutral'
...@@ -216,7 +216,7 @@ def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True ...@@ -216,7 +216,7 @@ def factor_analysis(engine, factor_name, universe, benchmark_code, positive=True
def worker_func_positive(factor_name): def worker_func_positive(factor_name):
from alphamind.api import SqlEngine, Universe from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q'] neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine() engine = SqlEngine()
benchmark_code = 905 benchmark_code = 905
universe_name = ['zz500'] universe_name = ['zz500']
...@@ -226,7 +226,7 @@ def worker_func_positive(factor_name): ...@@ -226,7 +226,7 @@ def worker_func_positive(factor_name):
def worker_func_negative(factor_name): def worker_func_negative(factor_name):
from alphamind.api import SqlEngine, Universe from alphamind.api import SqlEngine, Universe
neutralize_factors = None #['roe_q', 'ep_q'] neutralize_factors = ['roe_q', 'ep_q']
engine = SqlEngine() engine = SqlEngine()
benchmark_code = 905 benchmark_code = 905
universe_name = ['zz500'] universe_name = ['zz500']
...@@ -235,34 +235,34 @@ def worker_func_negative(factor_name): ...@@ -235,34 +235,34 @@ def worker_func_negative(factor_name):
if __name__ == '__main__': if __name__ == '__main__':
# from dask.distributed import Client from dask.distributed import Client
#
# client = Client('10.63.6.176:8786')
#
# engine = SqlEngine()
# df = engine.fetch_factor_coverage()
# df = df[df.universe == 'zz800'].groupby('factor').mean()
# df = df[df.coverage >= 0.98]
#
# tasks = client.map(worker_func_positive, df.index.tolist())
# res1 = client.gather(tasks)
#
# tasks = client.map(worker_func_negative, df.index.tolist())
# res2 = client.gather(tasks)
#
# factor_df = pd.DataFrame()
#
# for f_name, df in res1:
# factor_df[f_name] = df['returns']
#
# for f_name, df in res2:
# factor_df[f_name] = df['returns']
factor_name = LAST('ep_q') # LAST('EBITDA') / LAST('ev') client = Client('192.168.0.102:8786')
f_name, ret_df = worker_func_positive(factor_name)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6), engine = SqlEngine()
title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format( df = engine.fetch_factor_coverage()
frequency, factor_name, 905), df = df[df.universe == 'zz800'].groupby('factor').mean()
secondary_y='tc_cost') df = df[df.coverage >= 0.98]
plt.show()
tasks = client.map(worker_func_positive, df.index.tolist())
res1 = client.gather(tasks)
tasks = client.map(worker_func_negative, df.index.tolist())
res2 = client.gather(tasks)
factor_df = pd.DataFrame()
for f_name, df in res1:
factor_df[f_name] = df['returns']
for f_name, df in res2:
factor_df[f_name] = df['returns']
# factor_name = LAST('EBITDA') / LAST('ev')
# f_name, ret_df = worker_func_positive(factor_name)
#
# ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
# title='Fixed frequency rebalanced: {0} for {1} with benchmark {2}'.format(
# frequency, factor_name, 905),
# secondary_y='tc_cost')
# plt.show()
This diff is collapsed.
...@@ -15,6 +15,7 @@ from PyFin.api import makeSchedule ...@@ -15,6 +15,7 @@ from PyFin.api import makeSchedule
from PyFin.api import BizDayConventions from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from PyFin.api import pyFinAssert
from PyFin.DateUtilities import Period from PyFin.DateUtilities import Period
from alphamind.data.transformer import Transformer from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
...@@ -101,7 +102,8 @@ def prepare_data(engine: SqlEngine, ...@@ -101,7 +102,8 @@ def prepare_data(engine: SqlEngine,
['trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'] + transformer.names] ['trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'] + transformer.names]
def batch_processing(x_values, def batch_processing(names,
x_values,
y_values, y_values,
groups, groups,
group_label, group_label,
...@@ -132,10 +134,11 @@ def batch_processing(x_values, ...@@ -132,10 +134,11 @@ def batch_processing(x_values,
else: else:
this_risk_exp = None this_risk_exp = None
train_x_buckets[end] = factor_processing(this_raw_x, train_x_buckets[end] = pd.DataFrame(factor_processing(this_raw_x,
pre_process=pre_process, pre_process=pre_process,
risk_factors=this_risk_exp, risk_factors=this_risk_exp,
post_process=post_process) post_process=post_process),
columns=names)
train_y_buckets[end] = factor_processing(this_raw_y, train_y_buckets[end] = factor_processing(this_raw_y,
pre_process=pre_process, pre_process=pre_process,
...@@ -163,7 +166,7 @@ def batch_processing(x_values, ...@@ -163,7 +166,7 @@ def batch_processing(x_values,
inner_left_index = bisect.bisect_left(sub_dates, end) inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = ne_x[inner_left_index:inner_right_index] predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index], columns=names)
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index] predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
predict_codes_bucket[end] = this_codes[inner_left_index:inner_right_index] predict_codes_bucket[end] = this_codes[inner_left_index:inner_right_index]
...@@ -198,8 +201,8 @@ def fetch_data_package(engine: SqlEngine, ...@@ -198,8 +201,8 @@ def fetch_data_package(engine: SqlEngine,
pre_process: Iterable[object] = None, pre_process: Iterable[object] = None,
post_process: Iterable[object] = None) -> dict: post_process: Iterable[object] = None) -> dict:
alpha_logger.info("Starting data package fetching ...") alpha_logger.info("Starting data package fetching ...")
transformer = Transformer(alpha_factors) transformer = Transformer(alpha_factors)
names = transformer.names
dates, return_df, factor_df = prepare_data(engine, dates, return_df, factor_df = prepare_data(engine,
transformer, transformer,
start_date, start_date,
...@@ -210,7 +213,7 @@ def fetch_data_package(engine: SqlEngine, ...@@ -210,7 +213,7 @@ def fetch_data_package(engine: SqlEngine,
warm_start) warm_start)
return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \ return_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
_merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk) _merge_df(engine, names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)
alpha_logger.info("data merging finished") alpha_logger.info("data merging finished")
...@@ -226,7 +229,8 @@ def fetch_data_package(engine: SqlEngine, ...@@ -226,7 +229,8 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Loading data is finished") alpha_logger.info("Loading data is finished")
train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets, predict_codes_bucket \ train_x_buckets, train_y_buckets, train_risk_buckets, predict_x_buckets, predict_y_buckets, predict_risk_buckets, predict_codes_bucket \
= batch_processing(x_values, = batch_processing(names,
x_values,
y_values, y_values,
dates, dates,
date_label, date_label,
...@@ -239,15 +243,16 @@ def fetch_data_package(engine: SqlEngine, ...@@ -239,15 +243,16 @@ def fetch_data_package(engine: SqlEngine,
alpha_logger.info("Data processing is finished") alpha_logger.info("Data processing is finished")
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = names
ret['settlement'] = return_df ret['settlement'] = return_df
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets} ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, 'code': predict_codes_bucket} ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
'code': predict_codes_bucket}
return ret return ret
def fetch_train_phase(engine, def fetch_train_phase(engine,
alpha_factors: Iterable[object], alpha_factors: Union[Transformer, Iterable[object]],
ref_date, ref_date,
frequency, frequency,
universe, universe,
...@@ -257,6 +262,9 @@ def fetch_train_phase(engine, ...@@ -257,6 +262,9 @@ def fetch_train_phase(engine,
pre_process: Iterable[object] = None, pre_process: Iterable[object] = None,
post_process: Iterable[object] = None, post_process: Iterable[object] = None,
warm_start: int = 0) -> dict: warm_start: int = 0) -> dict:
if isinstance(alpha_factors, Transformer):
transformer = alpha_factors
else:
transformer = Transformer(alpha_factors) transformer = Transformer(alpha_factors)
p = Period(frequency) p = Period(frequency)
...@@ -284,11 +292,12 @@ def fetch_train_phase(engine, ...@@ -284,11 +292,12 @@ def fetch_train_phase(engine,
_merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk) _merge_df(engine, transformer.names, factor_df, return_df, universe, dates, risk_model, neutralized_risk)
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date))
end = dates[-2] end = dates[-2]
start = dates[-batch - 1] start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
else: else:
end = dates[-1] end = dates[-1]
start = dates[-batch] start = dates[-batch] if batch <= len(dates) else dates[0]
index = (date_label >= start) & (date_label <= end) index = (date_label >= start) & (date_label <= end)
this_raw_x = x_values[index] this_raw_x = x_values[index]
...@@ -311,13 +320,13 @@ def fetch_train_phase(engine, ...@@ -311,13 +320,13 @@ def fetch_train_phase(engine,
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['train'] = {'x': ne_x, 'y': ne_y, 'code': this_code} ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code}
return ret return ret
def fetch_predict_phase(engine, def fetch_predict_phase(engine,
alpha_factors: Iterable[object], alpha_factors: Union[Transformer, Iterable[object]],
ref_date, ref_date,
frequency, frequency,
universe, universe,
...@@ -326,7 +335,11 @@ def fetch_predict_phase(engine, ...@@ -326,7 +335,11 @@ def fetch_predict_phase(engine,
risk_model: str = 'short', risk_model: str = 'short',
pre_process: Iterable[object] = None, pre_process: Iterable[object] = None,
post_process: Iterable[object] = None, post_process: Iterable[object] = None,
warm_start: int = 0): warm_start: int = 0,
fillna: str=None):
if isinstance(alpha_factors, Transformer):
transformer = alpha_factors
else:
transformer = Transformer(alpha_factors) transformer = Transformer(alpha_factors)
p = Period(frequency) p = Period(frequency)
...@@ -340,7 +353,12 @@ def fetch_predict_phase(engine, ...@@ -340,7 +353,12 @@ def fetch_predict_phase(engine,
dateRule=BizDayConventions.Following, dateRule=BizDayConventions.Following,
dateGenerationRule=DateGeneration.Backward) dateGenerationRule=DateGeneration.Backward)
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates).dropna() factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fillna:
factor_df = factor_df.groupby('trade_date').apply(lambda x: x.fillna(x.median())).reset_index(drop=True).dropna()
else:
factor_df = factor_df.dropna()
names = transformer.names names = transformer.names
...@@ -360,7 +378,7 @@ def fetch_predict_phase(engine, ...@@ -360,7 +378,7 @@ def fetch_predict_phase(engine,
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
end = dates[-1] end = dates[-1]
start = dates[-batch] start = dates[-batch] if batch <= len(dates) else dates[0]
left_index = bisect.bisect_left(date_label, start) left_index = bisect.bisect_left(date_label, start)
right_index = bisect.bisect_right(date_label, end) right_index = bisect.bisect_right(date_label, end)
...@@ -392,7 +410,7 @@ def fetch_predict_phase(engine, ...@@ -392,7 +410,7 @@ def fetch_predict_phase(engine,
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['predict'] = {'x': ne_x, 'code': codes} ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes}
return ret return ret
......
...@@ -19,7 +19,7 @@ from alphamind.utilities import alpha_logger ...@@ -19,7 +19,7 @@ from alphamind.utilities import alpha_logger
class ConstLinearModelImpl(object): class ConstLinearModelImpl(object):
def __init__(self, weights: np.ndarray = None): def __init__(self, weights: np.ndarray = None):
self.weights = np.array(weights).flatten() self.weights = weights.flatten()
def fit(self, x: np.ndarray, y: np.ndarray): def fit(self, x: np.ndarray, y: np.ndarray):
pass pass
...@@ -31,15 +31,15 @@ class ConstLinearModelImpl(object): ...@@ -31,15 +31,15 @@ class ConstLinearModelImpl(object):
class ConstLinearModel(ModelBase): class ConstLinearModel(ModelBase):
def __init__(self, def __init__(self,
features: list = None, features=None,
formulas: dict = None, weights: dict = None):
weights: np.ndarray = None): super().__init__(features)
super().__init__(features, formulas=formulas)
if features is not None and weights is not None: if features is not None and weights is not None:
pyFinAssert(len(features) == len(weights), pyFinAssert(len(features) == len(weights),
ValueError, ValueError,
"length of features is not equal to length of weights") "length of features is not equal to length of weights")
self.impl = ConstLinearModelImpl(weights) if weights:
self.impl = ConstLinearModelImpl(np.array([weights[name] for name in self.features]))
def save(self): def save(self):
model_desc = super().save() model_desc = super().save()
...@@ -57,10 +57,9 @@ class ConstLinearModel(ModelBase): ...@@ -57,10 +57,9 @@ class ConstLinearModel(ModelBase):
class LinearRegression(ModelBase): class LinearRegression(ModelBase):
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs): def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs) self.impl = LinearRegressionImpl(fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
...@@ -85,10 +84,9 @@ class LinearRegression(ModelBase): ...@@ -85,10 +84,9 @@ class LinearRegression(ModelBase):
class LassoRegression(ModelBase): class LassoRegression(ModelBase):
def __init__(self, alpha=0.01, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs): def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs) self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
...@@ -113,8 +111,8 @@ class LassoRegression(ModelBase): ...@@ -113,8 +111,8 @@ class LassoRegression(ModelBase):
class LogisticRegression(ModelBase): class LogisticRegression(ModelBase):
def __init__(self, features: list = None, formulas: dict = None, fit_intercept: bool = False, **kwargs): def __init__(self, features=None, fit_intercept: bool = False, **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs) self.impl = LogisticRegressionImpl(fit_intercept=fit_intercept, **kwargs)
def save(self) -> dict: def save(self) -> dict:
......
...@@ -6,34 +6,42 @@ Created on 2017-9-4 ...@@ -6,34 +6,42 @@ Created on 2017-9-4
""" """
import abc import abc
import copy
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode from alphamind.utilities import encode
from alphamind.utilities import decode from alphamind.utilities import decode
from alphamind.data.transformer import Transformer
class ModelBase(metaclass=abc.ABCMeta): class ModelBase(metaclass=abc.ABCMeta):
def __init__(self, features: list=None, formulas: dict=None): def __init__(self, features=None):
if features is not None: if features is not None:
self.features = list(features) self.formulas = Transformer(features)
self.features = self.formulas.names
else: else:
self.features = None self.features = None
self.impl = None self.impl = None
self.formulas = copy.deepcopy(formulas)
self.trained_time = None self.trained_time = None
def fit(self, x: np.ndarray, y: np.ndarray): def __eq__(self, rhs):
self.impl.fit(x, y.flatten()) return encode(self.impl) == encode(rhs.impl) \
and self.trained_time == rhs.trained_time \
and list_eq(self.features, rhs.features) \
and encode(self.formulas) == encode(rhs.formulas)
def fit(self, x: pd.DataFrame, y: np.ndarray):
self.impl.fit(x[self.features].values, y.flatten())
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray: def predict(self, x: pd.DataFrame) -> np.ndarray:
return self.impl.predict(x) return self.impl.predict(x[self.features].values)
def score(self, x: np.ndarray, y: np.ndarray) -> float: def score(self, x: pd.DataFrame, y: np.ndarray) -> float:
return self.impl.score(x, y) return self.impl.score(x[self.features].values, y)
@abc.abstractmethod @abc.abstractmethod
def save(self) -> dict: def save(self) -> dict:
......
...@@ -5,10 +5,10 @@ Created on 2017-12-4 ...@@ -5,10 +5,10 @@ Created on 2017-12-4
@author: cheng.li @author: cheng.li
""" """
from typing import List
from distutils.version import LooseVersion from distutils.version import LooseVersion
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
...@@ -26,18 +26,16 @@ class RandomForestRegressor(ModelBase): ...@@ -26,18 +26,16 @@ class RandomForestRegressor(ModelBase):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int=100,
max_features: str='auto', max_features: str='auto',
features: List=None, features=None,
**kwargs): **kwargs):
super().__init__(features, **kwargs) super().__init__(features)
self.impl = RandomForestRegressorImpl(n_estimators=n_estimators, self.impl = RandomForestRegressorImpl(n_estimators=n_estimators,
max_features=max_features, max_features=max_features,
**kwargs) **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -60,19 +58,16 @@ class RandomForestClassifier(ModelBase): ...@@ -60,19 +58,16 @@ class RandomForestClassifier(ModelBase):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int=100,
max_features: str='auto', max_features: str='auto',
features: List = None, features=None,
formulas: dict = None,
**kwargs): **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = RandomForestClassifierImpl(n_estimators=n_estimators, self.impl = RandomForestClassifierImpl(n_estimators=n_estimators,
max_features=max_features, max_features=max_features,
**kwargs) **kwargs)
self.trained_time = None
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['sklearn_version'] = sklearn_version model_desc['sklearn_version'] = sklearn_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -96,11 +91,10 @@ class XGBRegressor(ModelBase): ...@@ -96,11 +91,10 @@ class XGBRegressor(ModelBase):
n_estimators: int=100, n_estimators: int=100,
learning_rate: float=0.1, learning_rate: float=0.1,
max_depth: int=3, max_depth: int=3,
features: List=None, features=None,
formulas: dict = None,
n_jobs: int=1, n_jobs: int=1,
**kwargs): **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = XGBRegressorImpl(n_estimators=n_estimators, self.impl = XGBRegressorImpl(n_estimators=n_estimators,
learning_rate=learning_rate, learning_rate=learning_rate,
max_depth=max_depth, max_depth=max_depth,
...@@ -110,7 +104,6 @@ class XGBRegressor(ModelBase): ...@@ -110,7 +104,6 @@ class XGBRegressor(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -134,11 +127,10 @@ class XGBClassifier(ModelBase): ...@@ -134,11 +127,10 @@ class XGBClassifier(ModelBase):
n_estimators: int=100, n_estimators: int=100,
learning_rate: float=0.1, learning_rate: float=0.1,
max_depth: int=3, max_depth: int=3,
features: List = None, features=None,
formulas: dict = None,
n_jobs: int=1, n_jobs: int=1,
**kwargs): **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.impl = XGBClassifierImpl(n_estimators=n_estimators, self.impl = XGBClassifierImpl(n_estimators=n_estimators,
learning_rate=learning_rate, learning_rate=learning_rate,
max_depth=max_depth, max_depth=max_depth,
...@@ -148,7 +140,6 @@ class XGBClassifier(ModelBase): ...@@ -148,7 +140,6 @@ class XGBClassifier(ModelBase):
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
...@@ -179,12 +170,11 @@ class XGBTrainer(ModelBase): ...@@ -179,12 +170,11 @@ class XGBTrainer(ModelBase):
early_stopping_rounds=None, early_stopping_rounds=None,
subsample=1., subsample=1.,
colsample_bytree=1., colsample_bytree=1.,
features: List = None, features=None,
formulas: dict = None,
random_state: int=0, random_state: int=0,
n_jobs: int=1, n_jobs: int=1,
**kwargs): **kwargs):
super().__init__(features, formulas=formulas) super().__init__(features)
self.params = { self.params = {
'silent': 1, 'silent': 1,
'objective': objective, 'objective': objective,
...@@ -204,9 +194,9 @@ class XGBTrainer(ModelBase): ...@@ -204,9 +194,9 @@ class XGBTrainer(ModelBase):
self.impl = None self.impl = None
self.kwargs = kwargs self.kwargs = kwargs
def fit(self, x, y): def fit(self, x: pd.DataFrame, y: np.ndarray):
if self.eval_sample: if self.eval_sample:
x_train, x_eval, y_train, y_eval = train_test_split(x, x_train, x_eval, y_train, y_eval = train_test_split(x[self.features].values,
y, y,
test_size=self.eval_sample, test_size=self.eval_sample,
random_state=42) random_state=42)
...@@ -219,7 +209,7 @@ class XGBTrainer(ModelBase): ...@@ -219,7 +209,7 @@ class XGBTrainer(ModelBase):
verbose_eval=False, verbose_eval=False,
**self.kwargs) **self.kwargs)
else: else:
d_train = xgb.DMatrix(x, y) d_train = xgb.DMatrix(x[self.features].values, y)
self.impl = xgb.train(params=self.params, self.impl = xgb.train(params=self.params,
dtrain=d_train, dtrain=d_train,
num_boost_round=self.num_boost_round, num_boost_round=self.num_boost_round,
...@@ -227,14 +217,13 @@ class XGBTrainer(ModelBase): ...@@ -227,14 +217,13 @@ class XGBTrainer(ModelBase):
self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss") self.trained_time = arrow.now().format("YYYY-MM-DD HH:mm:ss")
def predict(self, x: np.ndarray) -> np.ndarray: def predict(self, x: pd.DataFrame) -> np.ndarray:
d_predict = xgb.DMatrix(x) d_predict = xgb.DMatrix(x[self.features].values)
return self.impl.predict(d_predict) return self.impl.predict(d_predict)
def save(self) -> dict: def save(self) -> dict:
model_desc = super().save() model_desc = super().save()
model_desc['xgbboot_version'] = xgbboot_version model_desc['xgbboot_version'] = xgbboot_version
model_desc['importances'] = self.importances
return model_desc return model_desc
@classmethod @classmethod
......
...@@ -16,9 +16,9 @@ class Allocation(object): ...@@ -16,9 +16,9 @@ class Allocation(object):
def __init__(self, def __init__(self,
code: int, code: int,
minimum: int=0, minimum: int = 0,
maximum: int=inf, maximum: int = inf,
current: int=0): current: int = 0):
self.code = code self.code = code
self.minimum = minimum self.minimum = minimum
self.maximum = maximum self.maximum = maximum
...@@ -62,39 +62,32 @@ class Portfolio(object): ...@@ -62,39 +62,32 @@ class Portfolio(object):
class Execution(object): class Execution(object):
def __init__(self, def __init__(self,
name: str,
code: int, code: int,
qty: int, qty: int,
comment: str=None): cpty: str = 'external',
comment: str = None):
self.name = name
self.code = code self.code = code
self.qty = qty self.qty = qty
self.cpty = cpty
self.comment = comment self.comment = comment
def __repr__(self): def __repr__(self):
return "Execution(code={0}, qty={1}, comment={2})".format(self.code, return "Execution(name={0}, code={1}, qty={2}, cpty={3}, comment={4})".format(self.name,
self.code,
self.qty, self.qty,
self.cpty,
self.comment) self.comment)
class Executions(object):
def __init__(self,
name,
executions: List[Execution]=None):
self.name = name
self.executions = executions
def __repr__(self):
return "Executions(name={0}, executions={1})".format(self.name,
self.executions)
class Asset(object): class Asset(object):
def __init__(self, def __init__(self,
code: int, code: int,
name: str=None, name: str = None,
priority: List[str]=None, priority: List[str] = None,
forbidden: List[str]=None): forbidden: List[str] = None):
self.code = code self.code = code
self.name = name self.name = name
if priority: if priority:
...@@ -119,11 +112,11 @@ class Asset(object): ...@@ -119,11 +112,11 @@ class Asset(object):
self.forbidden) self.forbidden)
class TargetPositions(object): class Positions(object):
def __init__(self, def __init__(self,
assets: List[Asset]=None, assets: List[Asset] = None,
qtys: List[int]=None): qtys: List[int] = None):
if assets: if assets:
self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)} self.targets = {asset.code: (asset, qty) for asset, qty in zip(assets, qtys)}
...@@ -133,9 +126,9 @@ class TargetPositions(object): ...@@ -133,9 +126,9 @@ class TargetPositions(object):
def add_asset(self, def add_asset(self,
asset: Asset, asset: Asset,
qty: int): qty: int):
if asset.code in self.targets: code = asset.code
raise ValueError() pyFinAssert(code not in self.targets, ValueError, "code {0} is already in positions".format(code))
self.targets[asset.code] = (asset, qty) self.targets[code] = (asset, qty)
def __getitem__(self, code: int) -> Tuple[Asset, int]: def __getitem__(self, code: int) -> Tuple[Asset, int]:
return self.targets[code] return self.targets[code]
...@@ -148,10 +141,10 @@ class TargetPositions(object): ...@@ -148,10 +141,10 @@ class TargetPositions(object):
return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values())) return "TargetPositions(assets={0}, qtys={1})".format(*zip(*self.targets.values()))
def handle_one_asset(pre_allocation: Allocation, def handle_one_asset(p_name: str,
pre_allocation: Allocation,
asset: Asset, asset: Asset,
qty: int) -> Tuple[Execution, Allocation, int]: qty: int) -> Tuple[Execution, Allocation, int]:
minimum = pre_allocation.minimum minimum = pre_allocation.minimum
maximum = pre_allocation.maximum maximum = pre_allocation.maximum
current = pre_allocation.current current = pre_allocation.current
...@@ -161,14 +154,20 @@ def handle_one_asset(pre_allocation: Allocation, ...@@ -161,14 +154,20 @@ def handle_one_asset(pre_allocation: Allocation,
raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation)) raise ValueError("{0}'s target {1} is smaller than minimum amount {2}".format(asset.code, qty, pre_allocation))
elif qty < maximum: elif qty < maximum:
# need to buy / sell # need to buy / sell
ex = Execution(code, qty - current) ex = Execution(name=p_name,
code=code,
qty=qty - current,
cpty='external')
allocation = Allocation(code, allocation = Allocation(code,
minimum=minimum, minimum=minimum,
maximum=maximum, maximum=maximum,
current=qty) current=qty)
qty = 0 qty = 0
else: else:
ex = Execution(code, maximum - current) ex = Execution(name=p_name,
code=code,
qty=maximum - current,
cpty='external')
allocation = Allocation(code, allocation = Allocation(code,
minimum=minimum, minimum=minimum,
maximum=maximum, maximum=maximum,
...@@ -177,41 +176,40 @@ def handle_one_asset(pre_allocation: Allocation, ...@@ -177,41 +176,40 @@ def handle_one_asset(pre_allocation: Allocation,
return ex, allocation, qty return ex, allocation, qty
def pass_through(target_pos: TargetPositions, def pass_through(target_pos: Positions,
portfolio: Portfolio) -> Tuple[Executions, Portfolio, TargetPositions]: portfolio: Portfolio) -> Tuple[List[Execution], Portfolio, Positions]:
p_name = portfolio.name p_name = portfolio.name
new_target_pos = TargetPositions() new_target_pos = Positions()
allocations = [] allocations = []
executions = [] executions = []
for code in target_pos.codes: for code in target_pos.codes:
asset, qty = target_pos[code] asset, qty = target_pos[code]
if asset.priority: pyFinAssert(not asset.priority,
raise ValueError("asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority)) ValueError,
"asset ({0})'s priority pool {1} is not checked yet".format(code, asset.priority))
if p_name in asset.forbidden: if p_name in asset.forbidden:
ex = Execution(code, 0, "{0} is forbidden for {1}".format(code, p_name))
allocation = copy.deepcopy(portfolio[code]) allocation = copy.deepcopy(portfolio[code])
new_target_pos.add_asset(asset, qty) new_target_pos.add_asset(asset, qty)
else: else:
prev_allocation = portfolio[code] prev_allocation = portfolio[code]
ex, allocation, qty = handle_one_asset(prev_allocation, asset, qty) ex, allocation, qty = handle_one_asset(p_name, prev_allocation, asset, qty)
new_target_pos.add_asset(asset, qty) new_target_pos.add_asset(asset, qty)
if ex.qty != 0:
executions.append(ex)
allocations.append(allocation) allocations.append(allocation)
executions.append(ex)
return Executions(p_name, executions), Portfolio(p_name, allocations), new_target_pos return executions, Portfolio(p_name, allocations), new_target_pos
if __name__ == '__main__': if __name__ == '__main__':
asset1 = Asset(1, 'a') asset1 = Asset(1, 'a')
asset2 = Asset(2, 'b') asset2 = Asset(2, 'b')
asset3 = Asset(3, 'b') asset3 = Asset(3, 'b')
target_pos = TargetPositions([asset1, asset2, asset3], [200, 300, 100]) target_pos = Positions([asset1, asset2, asset3], [200, 300, 100])
allc1 = Allocation(1, 0, 100, 0) allc1 = Allocation(1, 0, 100, 0)
allc2 = Allocation(2, 0, 400, 100) allc2 = Allocation(2, 0, 400, 100)
...@@ -219,8 +217,3 @@ if __name__ == '__main__': ...@@ -219,8 +217,3 @@ if __name__ == '__main__':
portfolio = Portfolio('test1', [allc1, allc2]) portfolio = Portfolio('test1', [allc1, allc2])
executions, portfolio, target_pos = pass_through(target_pos, portfolio) executions, portfolio, target_pos = pass_through(target_pos, portfolio)
# -*- coding: utf-8 -*-
"""
Created on 2018-2-9
@author: cheng.li
"""
import unittest
from PyFin.api import LAST
from alphamind.data.engines.universe import Universe
class TestUniverse(unittest.TestCase):
def test_universe_equal(self):
universe1 = Universe('custom', ['zz500'])
universe2 = Universe('custom', ['zz500'])
self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500'])
universe2 = Universe('custom', ['zz800'])
self.assertNotEqual(universe1, universe2)
filter_cond = LAST('x') > 1.
universe1 = Universe('custom', ['zz500'], filter_cond=filter_cond)
universe2 = Universe('custom', ['zz500'], filter_cond=filter_cond)
self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
universe2 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 2.)
self.assertNotEqual(universe1, universe2)
def test_universe_persistence(self):
universe = Universe('custom', ['zz500'])
univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc)
self.assertEqual(universe.name, loaded_universe.name)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe)
universe = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc)
self.assertEqual(universe.name, loaded_universe.name)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe)
self.assertEqual(str(universe.filter_cond), str(loaded_universe.filter_cond))
# -*- coding: utf-8 -*-
"""
Created on 2018-2-9
@author: cheng.li
"""
import unittest
from alphamind.data.engines.universe import Universe
from alphamind.model.composer import DataMeta
from alphamind.model.composer import Composer
from alphamind.model.treemodel import XGBClassifier
class TestComposer(unittest.TestCase):
def _assert_composer_equal(self, lhs: Composer, rhs: Composer):
self.assertEqual(lhs.alpha_model, rhs.alpha_model)
self.assertEqual(lhs.data_meta, rhs.data_meta)
def test_data_meta_persistence(self):
freq = '5b'
universe = Universe('custom', ['zz800'])
batch = 4
neutralized_risk = ['SIZE']
risk_model = 'long'
pre_process = ['standardize', 'winsorize_normal']
post_process = ['standardize', 'winsorize_normal']
warm_start = 2
data_source = 'postgresql://user:pwd@server/dummy'
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source)
data_desc = data_meta.save()
loaded_data = DataMeta.load(data_desc)
self.assertEqual(data_meta.freq, loaded_data.freq)
self.assertEqual(data_meta.universe, loaded_data.universe)
self.assertEqual(data_meta.batch, loaded_data.batch)
self.assertEqual(data_meta.neutralized_risk, loaded_data.neutralized_risk)
self.assertEqual(data_meta.risk_model, loaded_data.risk_model)
self.assertEqual(data_meta.pre_process, loaded_data.pre_process)
self.assertEqual(data_meta.post_process, loaded_data.post_process)
self.assertEqual(data_meta.warm_start, loaded_data.warm_start)
self.assertEqual(data_meta.data_source, loaded_data.data_source)
def test_composer_persistence(self):
freq = '5b'
universe = Universe('custom', ['zz800'])
batch = 4
neutralized_risk = ['SIZE']
risk_model = 'long'
pre_process = ['standardize', 'winsorize_normal']
post_process = ['standardize', 'winsorize_normal']
warm_start = 2
data_source = 'postgresql://user:pwd@server/dummy'
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model=risk_model,
pre_process=pre_process,
post_process=post_process,
warm_start=warm_start,
data_source=data_source)
features = {'f1': 'closePrice', 'f2': 'openPrice'}
alpha_model = XGBClassifier(features=features)
composer = Composer(alpha_model=alpha_model,
data_meta=data_meta)
comp_desc = composer.save()
loaded_comp = Composer.load(comp_desc)
self._assert_composer_equal(composer, loaded_comp)
...@@ -7,6 +7,7 @@ Created on 2017-9-4 ...@@ -7,6 +7,7 @@ Created on 2017-9-4
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2 from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
...@@ -19,23 +20,25 @@ class TestLinearModel(unittest.TestCase): ...@@ -19,23 +20,25 @@ class TestLinearModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.train_x = np.random.randn(1000, self.n) self.features = ['a', 'b', 'c']
self.train_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.train_y = np.random.randn(1000) self.train_y = np.random.randn(1000)
self.train_y_label = np.where(self.train_y > 0., 1, 0) self.train_y_label = np.where(self.train_y > 0., 1, 0)
self.predict_x = np.random.randn(10, self.n) self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self): def test_const_linear_model(self):
weights = np.array([1., 2., 3.]) features = ['c', 'b', 'a']
model = ConstLinearModel(features=['a', 'b', 'c'], weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=features,
weights=weights) weights=weights)
calculated_y = model.predict(self.predict_x) calculated_y = model.predict(self.predict_x)
expected_y = self.predict_x @ weights expected_y = self.predict_x[features] @ np.array([weights[f] for f in features])
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
def test_const_linear_model_persistence(self): def test_const_linear_model_persistence(self):
weights = np.array([1., 2., 3.]) weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=['a', 'b', 'c'], model = ConstLinearModel(features=['a', 'b', 'c'],
weights=weights) weights=weights)
......
...@@ -7,6 +7,7 @@ Created on 2017-9-5 ...@@ -7,6 +7,7 @@ Created on 2017-9-5
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
...@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase): ...@@ -15,10 +16,10 @@ class TestLoader(unittest.TestCase):
def setUp(self): def setUp(self):
self.n = 3 self.n = 3
self.trained_x = np.random.randn(1000, self.n) self.trained_x = pd.DataFrame(np.random.randn(1000, self.n), columns=['a', 'b', 'c'])
self.trained_y = np.random.randn(1000, 1) self.trained_y = np.random.randn(1000, 1)
self.predict_x = np.random.randn(100, self.n) self.predict_x = pd.DataFrame(np.random.randn(100, self.n), columns=['a', 'b', 'c'])
def test_load_model(self): def test_load_model(self):
model = LinearRegression(['a', 'b', 'c']) model = LinearRegression(['a', 'b', 'c'])
......
# -*- coding: utf-8 -*-
"""
Created on 2018-2-8
@author: cheng.li
"""
import unittest
from alphamind.model.linearmodel import ConstLinearModel
class TestModelBase(unittest.TestCase):
def test_simple_model_features(self):
model = ConstLinearModel(features=['c', 'b', 'a'])
self.assertListEqual(['a', 'b', 'c'], model.features)
\ No newline at end of file
...@@ -7,6 +7,7 @@ Created on 2018-1-5 ...@@ -7,6 +7,7 @@ Created on 2018-1-5
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier from alphamind.model.treemodel import RandomForestClassifier
...@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer ...@@ -18,23 +19,24 @@ from alphamind.model.treemodel import XGBTrainer
class TestTreeModel(unittest.TestCase): class TestTreeModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.x = np.random.randn(1000, 10) self.features = list('0123456789')
self.x = pd.DataFrame(np.random.randn(1000, 10), columns=self.features)
self.y = np.random.randn(1000) self.y = np.random.randn(1000)
self.sample_x = pd.DataFrame(np.random.randn(100, 10), columns=self.features)
def test_random_forest_regress_persistence(self): def test_random_forest_regress_persistence(self):
model = RandomForestRegressor(features=list(range(10))) model = RandomForestRegressor(features=self.features)
model.fit(self.x, self.y) model.fit(self.x, self.y)
desc = model.save() desc = model.save()
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self): def test_random_forest_classify_persistence(self):
model = RandomForestClassifier(features=list(range(10))) model = RandomForestClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0) y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y) model.fit(self.x, y)
...@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase): ...@@ -42,24 +44,22 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self): def test_xgb_regress_persistence(self):
model = XGBRegressor(features=list(range(10))) model = XGBRegressor(features=self.features)
model.fit(self.x, self.y) model.fit(self.x, self.y)
desc = model.save() desc = model.save()
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self): def test_xgb_classify_persistence(self):
model = XGBClassifier(features=list(range(10))) model = XGBClassifier(features=self.features)
y = np.where(self.y > 0, 1, 0) y = np.where(self.y > 0, 1, 0)
model.fit(self.x, y) model.fit(self.x, y)
...@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase): ...@@ -67,20 +67,18 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self): def test_xgb_trainer_equal_classifier(self):
sample_x = np.random.randn(100, 10)
model1 = XGBClassifier(n_estimators=100, model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1, learning_rate=0.1,
max_depth=3, max_depth=3,
features=list(range(10)), features=self.features,
random_state=42) random_state=42)
model2 = XGBTrainer(features=list(range(10)), model2 = XGBTrainer(features=self.features,
objective='reg:logistic', objective='reg:logistic',
booster='gbtree', booster='gbtree',
tree_method='exact', tree_method='exact',
...@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase): ...@@ -93,13 +91,13 @@ class TestTreeModel(unittest.TestCase):
model1.fit(self.x, y) model1.fit(self.x, y)
model2.fit(self.x, y) model2.fit(self.x, y)
predict1 = model1.predict(sample_x) predict1 = model1.predict(self.sample_x)
predict2 = model2.predict(sample_x) predict2 = model2.predict(self.sample_x)
predict2 = np.where(predict2 > 0.5, 1., 0.) predict2 = np.where(predict2 > 0.5, 1., 0.)
np.testing.assert_array_almost_equal(predict1, predict2) np.testing.assert_array_almost_equal(predict1, predict2)
def test_xgb_trainer_persistence(self): def test_xgb_trainer_persistence(self):
model = XGBTrainer(features=list(range(10)), model = XGBTrainer(features=self.features,
objective='binary:logistic', objective='binary:logistic',
booster='gbtree', booster='gbtree',
tree_method='hist', tree_method='hist',
...@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase): ...@@ -111,6 +109,5 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
sample_x = np.random.randn(100, 10) np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(sample_x), new_model.predict(sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
# -*- coding: utf-8 -*-
"""
Created on 2018-2-7
@author: cheng.li
"""
import unittest
from alphamind.portfolio.allocations import Asset
from alphamind.portfolio.allocations import Portfolio
from alphamind.portfolio.allocations import Positions
from alphamind.portfolio.allocations import Execution
class TestAllocation(unittest.TestCase):
pass
\ No newline at end of file
...@@ -15,6 +15,7 @@ from alphamind.tests.data.test_neutralize import TestNeutralize ...@@ -15,6 +15,7 @@ from alphamind.tests.data.test_neutralize import TestNeutralize
from alphamind.tests.data.test_standardize import TestStandardize from alphamind.tests.data.test_standardize import TestStandardize
from alphamind.tests.data.test_winsorize import TestWinsorize from alphamind.tests.data.test_winsorize import TestWinsorize
from alphamind.tests.data.test_quantile import TestQuantile from alphamind.tests.data.test_quantile import TestQuantile
from alphamind.tests.data.engines.test_universe import TestUniverse
from alphamind.tests.portfolio.test_constraints import TestConstraints from alphamind.tests.portfolio.test_constraints import TestConstraints
from alphamind.tests.portfolio.test_evolver import TestEvolver from alphamind.tests.portfolio.test_evolver import TestEvolver
from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
...@@ -27,9 +28,11 @@ from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis ...@@ -27,9 +28,11 @@ from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
from alphamind.tests.model.test_modelbase import TestModelBase
from alphamind.tests.model.test_linearmodel import TestLinearModel from alphamind.tests.model.test_linearmodel import TestLinearModel
from alphamind.tests.model.test_treemodel import TestTreeModel from alphamind.tests.model.test_treemodel import TestTreeModel
from alphamind.tests.model.test_loader import TestLoader from alphamind.tests.model.test_loader import TestLoader
from alphamind.tests.model.test_composer import TestComposer
from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor from alphamind.tests.execution.test_naiveexecutor import TestNaiveExecutor
from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor from alphamind.tests.execution.test_thresholdexecutor import TestThresholdExecutor
from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor from alphamind.tests.execution.test_targetvolexecutor import TestTargetVolExecutor
...@@ -42,6 +45,7 @@ if __name__ == '__main__': ...@@ -42,6 +45,7 @@ if __name__ == '__main__':
TestStandardize, TestStandardize,
TestWinsorize, TestWinsorize,
TestQuantile, TestQuantile,
TestUniverse,
TestConstraints, TestConstraints,
TestEvolver, TestEvolver,
TestLongShortBuild, TestLongShortBuild,
...@@ -54,9 +58,11 @@ if __name__ == '__main__': ...@@ -54,9 +58,11 @@ if __name__ == '__main__':
TestPerformanceAnalysis, TestPerformanceAnalysis,
TestFactorAnalysis, TestFactorAnalysis,
TestQuantileAnalysis, TestQuantileAnalysis,
TestModelBase,
TestLinearModel, TestLinearModel,
TestTreeModel, TestTreeModel,
TestLoader, TestLoader,
TestComposer,
TestNaiveExecutor, TestNaiveExecutor,
TestThresholdExecutor, TestThresholdExecutor,
TestTargetVolExecutor, TestTargetVolExecutor,
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ pandas >= 0.19.2 ...@@ -6,7 +6,7 @@ pandas >= 0.19.2
scikit-learn >= 0.18.1 scikit-learn >= 0.18.1
numba >= 0.33.0 numba >= 0.33.0
scipy >= 0.19.0 scipy >= 0.19.0
simpleutils >= 0.1.0 simpleutils >= 0.1.2
sqlalchemy >= 1.1.14 sqlalchemy >= 1.1.14
psycopg2 >= 2.7.1 psycopg2 >= 2.7.1
finance-python >= 0.5.7 finance-python >= 0.5.7
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment