Unverified Commit f4ddddae authored by iLampard's avatar iLampard Committed by GitHub

Merge pull request #11 from alpha-miner/master

merge update
parents 469fe61f 48fc6a88
...@@ -28,6 +28,7 @@ from alphamind.data.standardize import standardize ...@@ -28,6 +28,7 @@ from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection from alphamind.data.standardize import projection
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.data.rank import rank from alphamind.data.rank import rank
from alphamind.data.rank import percentile
from alphamind.data.engines.sqlengine import factor_tables from alphamind.data.engines.sqlengine import factor_tables
from alphamind.data.engines.utilities import industry_list from alphamind.data.engines.utilities import industry_list
...@@ -80,6 +81,7 @@ __all__ = [ ...@@ -80,6 +81,7 @@ __all__ = [
'projection', 'projection',
'neutralize', 'neutralize',
'rank', 'rank',
'percentile',
'factor_tables', 'factor_tables',
'industry_list', 'industry_list',
'fetch_data_package', 'fetch_data_package',
......
...@@ -12,6 +12,19 @@ Base = declarative_base() ...@@ -12,6 +12,19 @@ Base = declarative_base()
metadata = Base.metadata metadata = Base.metadata
class Categories(Base):
__tablename__ = 'categories'
__table_args__ = (
Index('categories_pk', 'trade_date', 'code', unique=True),
)
trade_date = Column(DateTime, primary_key=True, nullable=False)
code = Column(BigInteger, primary_key=True, nullable=False)
sw1 = Column(Integer)
sw1_adj = Column(Integer)
class DailyPortfolios(Base): class DailyPortfolios(Base):
__tablename__ = 'daily_portfolios' __tablename__ = 'daily_portfolios'
__table_args__ = ( __table_args__ = (
...@@ -68,6 +81,7 @@ class Experimental(Base): ...@@ -68,6 +81,7 @@ class Experimental(Base):
pure_liq_2 = Column(Float(53)) pure_liq_2 = Column(Float(53))
pure_liq_3 = Column(Float(53)) pure_liq_3 = Column(Float(53))
pure_liq_4 = Column(Float(53)) pure_liq_4 = Column(Float(53))
pe_hist60 = Column(Float(53))
class FactorMaster(Base): class FactorMaster(Base):
...@@ -137,82 +151,6 @@ class Industry(Base): ...@@ -137,82 +151,6 @@ class Industry(Base):
IndustryName4 = Column(String(50)) IndustryName4 = Column(String(50))
class LegacyFactor(Base):
__tablename__ = 'legacy_factor'
__table_args__ = (
Index('legacy_factor_idx', 'trade_date', 'code', unique=True),
)
trade_date = Column(DateTime, primary_key=True, nullable=False)
code = Column(Integer, primary_key=True, nullable=False)
ROEAfterNonRecurring = Column(Float(53))
EPSAfterNonRecurring = Column(Float(53))
EODPrice = Column(Float(53))
LogFloatCap = Column(Float(53))
BPS = Column(Float(53))
SPS = Column(Float(53))
DebtToAsset = Column(Float(53))
STOM = Column(Float(53))
DROEAfterNonRecurring = Column(Float(53))
LogTotalCap = Column(Float(53))
BP = Column(Float(53))
SP = Column(Float(53))
EPAfterNonRecurring = Column(Float(53))
DivToB = Column(Float(53))
DivP = Column(Float(53))
EBITToSales = Column(Float(53))
EBITAToSales = Column(Float(53))
EVToSales = Column(Float(53))
EVToEBIT = Column(Float(53))
EVToEBITDA = Column(Float(53))
EVToNOPLAT = Column(Float(53))
EVToIC = Column(Float(53))
ROIC = Column(Float(53))
FCFFPS = Column(Float(53))
FCFFToEarningAfterNonRecurring = Column(Float(53))
FCFFP = Column(Float(53))
ProfitToAsset = Column(Float(53))
GrossProfitRatio = Column(Float(53))
NetProfitRatio = Column(Float(53))
LATO = Column(Float(53))
FATO = Column(Float(53))
TATO = Column(Float(53))
EquityTO = Column(Float(53))
PayableTO = Column(Float(53))
RecievableTO = Column(Float(53))
RevenueGrowth = Column(Float(53))
GrossProfitGrowth = Column(Float(53))
NetProfitGrowth = Column(Float(53))
GrossCFToRevenue = Column(Float(53))
CFToRevenue = Column(Float(53))
CFToProfit = Column(Float(53))
CFToAsset = Column(Float(53))
GrossCFGrowth = Column(Float(53))
CFGrowth = Column(Float(53))
ICFGrowth = Column(Float(53))
AveAmount60 = Column(Float(53))
PeriodReturn60 = Column(Float(53))
AmountRatio60to250 = Column(Float(53))
CFPS = Column(Float(53))
CFP = Column(Float(53))
NetCFGrowth = Column(Float(53))
NetCFGrowthP = Column(Float(53))
NetCash = Column(Float(53))
NetCashP = Column(Float(53))
BVPSGrowth = Column(Float(53))
EquityPSGrowth = Column(Float(53))
WholeSales = Column(Float(53))
WholeProfitAfterNonRecurring = Column(Float(53))
ExpenseRatio = Column(Float(53))
CurrentRatio = Column(Float(53))
QuickRatio = Column(Float(53))
AcidTestRatio = Column(Float(53))
TimeInterestEarnedRatio = Column(Float(53))
DepositReceivedVsSale = Column(Float(53))
DebtRatioExcemptDepRec = Column(Float(53))
SNBARatio = Column(Float(53))
class Market(Base): class Market(Base):
__tablename__ = 'market' __tablename__ = 'market'
__table_args__ = ( __table_args__ = (
...@@ -301,21 +239,6 @@ class PortfolioSettings(Base): ...@@ -301,21 +239,6 @@ class PortfolioSettings(Base):
weight = Column(Float(53), nullable=False) weight = Column(Float(53), nullable=False)
class Positions(Base):
__tablename__ = 'positions'
__table_args__ = (
Index('positions_idx', 'trade_date', 'source', 'universe', 'benchmark', 'portfolio', 'type', unique=True),
)
source = Column(String(50), primary_key=True, nullable=False)
universe = Column(String(50), primary_key=True, nullable=False)
benchmark = Column(Integer, primary_key=True, nullable=False)
trade_date = Column(DateTime, primary_key=True, nullable=False)
portfolio = Column(String(50), primary_key=True, nullable=False)
type = Column(String(50), primary_key=True, nullable=False)
weight = Column(JSON)
class RebalanceLog(Base): class RebalanceLog(Base):
__tablename__ = 'rebalance_log' __tablename__ = 'rebalance_log'
__table_args__ = ( __table_args__ = (
...@@ -718,30 +641,53 @@ class Strategy(Base): ...@@ -718,30 +641,53 @@ class Strategy(Base):
source = Column(String(20), primary_key=True, nullable=False) source = Column(String(20), primary_key=True, nullable=False)
class Tiny(Base):
__tablename__ = 'tiny'
__table_args__ = (
Index('tiny_idx', 'trade_date', 'code', unique=True),
)
trade_date = Column(DateTime, primary_key=True, nullable=False)
code = Column(Integer, primary_key=True, nullable=False)
CFinc1 = Column(Float(53))
BDTO = Column(Float(53))
RVOL = Column(Float(53))
CHV = Column(Float(53))
VAL = Column(Float(53))
class Universe(Base): class Universe(Base):
__tablename__ = 'universe' __tablename__ = 'universe'
__table_args__ = ( __table_args__ = (
Index('universe_idx', 'trade_date', 'universe', 'code', unique=True), Index('universe_trade_date_code_uindex', 'trade_date', 'code', unique=True),
) )
trade_date = Column(DateTime, primary_key=True, nullable=False) trade_date = Column(DateTime, primary_key=True, nullable=False)
code = Column(Integer, primary_key=True, nullable=False) code = Column(BigInteger, primary_key=True, nullable=False)
universe = Column(String(20), primary_key=True, nullable=False) aerodef = Column(Integer)
agriforest = Column(Integer)
auto = Column(Integer)
bank = Column(Integer)
builddeco = Column(Integer)
chem = Column(Integer)
conmat = Column(Integer)
commetrade = Column(Integer)
computer = Column(Integer)
conglomerates = Column(Integer)
eleceqp = Column(Integer)
electronics = Column(Integer)
foodbever = Column(Integer)
health = Column(Integer)
houseapp = Column(Integer)
ironsteel = Column(Integer)
leiservice = Column(Integer)
lightindus = Column(Integer)
machiequip = Column(Integer)
media = Column(Integer)
mining = Column(Integer)
nonbankfinan = Column(Integer)
nonfermetal = Column(Integer)
realestate = Column(Integer)
telecom = Column(Integer)
textile = Column(Integer)
transportation = Column(Integer)
utilities = Column(Integer)
ashare = Column(Integer)
ashare_ex = Column(Integer)
cyb = Column(Integer)
hs300 = Column(Integer)
sh50 = Column(Integer)
zxb = Column(Integer)
zz1000 = Column(Integer)
zz500 = Column(Integer)
zz800 = Column(Integer)
hs300_adj = Column(Integer)
zz500_adj = Column(Integer)
class Uqer(Base): class Uqer(Base):
...@@ -1316,5 +1262,5 @@ class Outright(Base): ...@@ -1316,5 +1262,5 @@ class Outright(Base):
if __name__ == '__main__': if __name__ == '__main__':
from sqlalchemy import create_engine from sqlalchemy import create_engine
engine = create_engine('postgres+psycopg2://postgres:we083826@192.168.0.102/alpha') engine = create_engine('postgresql+psycopg2://postgres:we083826@101.132.104.118/alpha')
Base.metadata.create_all(engine) Base.metadata.create_all(engine)
This diff is collapsed.
This diff is collapsed.
...@@ -17,13 +17,12 @@ from alphamind.data.dbmodel.models import SpecificRiskLong ...@@ -17,13 +17,12 @@ from alphamind.data.dbmodel.models import SpecificRiskLong
from alphamind.data.dbmodel.models import Uqer from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import Gogoal from alphamind.data.dbmodel.models import Gogoal
from alphamind.data.dbmodel.models import Experimental from alphamind.data.dbmodel.models import Experimental
from alphamind.data.dbmodel.models import LegacyFactor
from alphamind.data.dbmodel.models import Tiny
from alphamind.data.dbmodel.models import RiskExposure from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Categories
from alphamind.data.engines.industries import INDUSTRY_MAPPING from alphamind.data.engines.industries import INDUSTRY_MAPPING
factor_tables = [Market, RiskExposure, Uqer, Gogoal, Experimental, LegacyFactor, Tiny] factor_tables = [Market, RiskExposure, Uqer, Gogoal, Experimental, Categories]
def _map_risk_model_table(risk_model: str) -> tuple: def _map_risk_model_table(risk_model: str) -> tuple:
......
...@@ -7,6 +7,7 @@ Created on 2017-8-8 ...@@ -7,6 +7,7 @@ Created on 2017-8-8
from typing import Optional from typing import Optional
import numpy as np import numpy as np
from scipy.stats import rankdata
import alphamind.utilities as utils import alphamind.utilities as utils
...@@ -22,8 +23,30 @@ def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray: ...@@ -22,8 +23,30 @@ def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
start = 0 start = 0
for diff_loc in index_diff: for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1] curr_idx = order[start:diff_loc + 1]
res[curr_idx] = x[curr_idx].argsort(axis=0) res[curr_idx] = rankdata(x[curr_idx]).astype(float) - 1.
start = diff_loc + 1 start = diff_loc + 1
return res return res
else: else:
return x.argsort(axis=0).argsort(axis=0) return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
def percentile(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
if x.ndim == 1:
x = x.reshape((-1, 1))
if groups is not None:
res = np.zeros(x.shape, dtype=int)
index_diff, order = utils.groupby(groups)
start = 0
for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1]
curr_values = x[curr_idx]
length = len(curr_values) - 1. if len(curr_values) > 1 else 1.
res[curr_idx] = (rankdata(curr_values).astype(float) - 1.) / length
start = diff_loc + 1
return res
else:
length = len(x) - 1. if len(x) > 1 else 1.
return ((rankdata(x).astype(float) - 1.) / length).reshape((-1, 1))
...@@ -5,41 +5,100 @@ Created on 2017-9-5 ...@@ -5,41 +5,100 @@ Created on 2017-9-5
@author: cheng.li @author: cheng.li
""" """
import sqlalchemy as sa import math
import arrow
import numpy as np
import pandas as pd import pandas as pd
import numpy as np
from PyFin.api import *
from alphamind.api import * from alphamind.api import *
from alphamind.data.dbmodel.models import Models
from alphamind.model.linearmodel import LinearRegression
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha') factor = 'ROE'
universe = Universe('custom', ['zz800'])
start_date = '2010-01-01'
end_date = '2018-04-26'
freq = '10b'
category = 'sw_adj'
level = 1
horizon = map_freq(freq)
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
def factor_analysis(factor):
engine = SqlEngine()
factors = {
'f1': CSQuantiles(factor),
'f2': CSQuantiles(factor, groups='sw1_adj'),
'f3': LAST(factor)
}
total_factor = engine.fetch_factor_range(universe, factors, dates=ref_dates)
_, risk_exp = engine.fetch_risk_model_range(universe, dates=ref_dates)
industry = engine.fetch_industry_range(universe, dates=ref_dates, category=category, level=level)
rets = engine.fetch_dx_return_range(universe, horizon=horizon, offset=1, dates=ref_dates)
total_factor = pd.merge(total_factor, industry[['trade_date', 'code', 'industry']], on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, risk_exp, on=['trade_date', 'code'])
total_factor = pd.merge(total_factor, rets, on=['trade_date', 'code']).dropna()
df_ret = pd.DataFrame(columns=['f1', 'f2', 'f3'])
df_ic = pd.DataFrame(columns=['f1', 'f2', 'f3'])
total_factor_groups = total_factor.groupby('trade_date')
for date, this_factors in total_factor_groups:
raw_factors = this_factors['f3'].values
industry_exp = this_factors[industry_styles + ['COUNTRY']].values.astype(float)
processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp,
post_process=[percentile])
this_factors['f3'] = processed_values
factor_values = this_factors[['f1', 'f2', 'f3']].values
positions = (factor_values >= 0.8) * 1.
positions[factor_values <= 0.2] = -1
positions /= np.abs(positions).sum(axis=0)
ret_values = this_factors.dx.values @ positions
df_ret.loc[date] = ret_values
ic_values = this_factors[['dx', 'f1', 'f2', 'f3']].corr().values[0, 1:]
df_ic.loc[date] = ic_values
x = np.random.randn(1000, 3) print(f"{factor} is finished")
y = np.random.randn(1000)
model = LinearRegression(['a', 'b', 'c']) return {'ic': (df_ic.mean(axis=0), df_ic.std(axis=0) / math.sqrt(len(df_ic))),
model.fit(x, y) 'ret': (df_ret.mean(axis=0), df_ret.std(axis=0) / math.sqrt(len(df_ic))),
'factor': factor}
model_desc = model.save()
df = pd.DataFrame() if __name__ == '__main__':
new_row = dict(trade_date='2017-09-05', from dask.distributed import Client
portfolio_name='test',
model_type='LinearRegression',
version=1,
model_desc=model_desc,
update_time=arrow.now().format())
df = df.append([new_row]) try:
client = Client("10.63.6.176:8786")
cols = pd.MultiIndex.from_product([['mean', 'std'], ['raw', 'peer', 'neutralized']])
factors_ret = pd.DataFrame(columns=cols)
factors_ic = pd.DataFrame(columns=cols)
df.to_sql(Models.__table__.name, engine.engine, factors = ['ep_q',
if_exists='append', 'roe_q',
index=False, 'SGRO',
dtype={'model_desc': sa.types.JSON}) 'GREV',
'IVR',
'ILLIQUIDITY',
'con_target_price',
'con_pe_rolling_order',
'DividendPaidRatio']
l = client.map(factor_analysis, factors)
results = client.gather(l)
model_in_db = engine.fetch_model('2017-09-05') for res in results:
factor = res['factor']
factors_ret.loc[factor, 'mean'] = res['ret'][0].values
factors_ret.loc[factor, 'std'] = res['ret'][1].values
print(model_in_db) factors_ic.loc[factor, 'mean'] = res['ic'][0].values
factors_ic.loc[factor, 'std'] = res['ic'][1].values
print(factors_ret)
finally:
client.close()
...@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal ...@@ -20,6 +20,7 @@ from alphamind.data.winsorize import winsorize_normal
from alphamind.data.rank import rank from alphamind.data.rank import rank
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.linearmodel import ConstLinearModel
PROCESS_MAPPING = { PROCESS_MAPPING = {
'winsorize_normal': winsorize_normal, 'winsorize_normal': winsorize_normal,
...@@ -48,7 +49,6 @@ class DataMeta(object): ...@@ -48,7 +49,6 @@ class DataMeta(object):
warm_start: int = 0, warm_start: int = 0,
data_source: str = None): data_source: str = None):
self.data_source = data_source self.data_source = data_source
self.engine = SqlEngine(self.data_source)
self.freq = freq self.freq = freq
self.universe = universe self.universe = universe
self.batch = batch self.batch = batch
...@@ -107,7 +107,7 @@ class DataMeta(object): ...@@ -107,7 +107,7 @@ class DataMeta(object):
def fetch_train_data(self, def fetch_train_data(self,
ref_date, ref_date,
alpha_model: ModelBase): alpha_model: ModelBase):
return fetch_train_phase(self.engine, return fetch_train_phase(SqlEngine(self.data_source),
alpha_model.formulas, alpha_model.formulas,
ref_date, ref_date,
self.freq, self.freq,
...@@ -123,7 +123,7 @@ class DataMeta(object): ...@@ -123,7 +123,7 @@ class DataMeta(object):
def fetch_predict_data(self, def fetch_predict_data(self,
ref_date: str, ref_date: str,
alpha_model: ModelBase): alpha_model: ModelBase):
return fetch_predict_phase(self.engine, return fetch_predict_phase(SqlEngine(self.data_source),
alpha_model.formulas, alpha_model.formulas,
ref_date, ref_date,
self.freq, self.freq,
...@@ -144,11 +144,13 @@ def train_model(ref_date: str, ...@@ -144,11 +144,13 @@ def train_model(ref_date: str,
x_values: pd.DataFrame = None, x_values: pd.DataFrame = None,
y_values: pd.DataFrame = None): y_values: pd.DataFrame = None):
base_model = copy.deepcopy(alpha_model) base_model = copy.deepcopy(alpha_model)
if x_values is None:
train_data = data_meta.fetch_train_data(ref_date, alpha_model) if not isinstance(alpha_model, ConstLinearModel):
x_values = train_data['train']['x'] if x_values is None:
y_values = train_data['train']['y'] train_data = data_meta.fetch_train_data(ref_date, alpha_model)
base_model.fit(x_values, y_values) x_values = train_data['train']['x']
y_values = train_data['train']['y']
base_model.fit(x_values, y_values)
return base_model return base_model
...@@ -238,19 +240,38 @@ class Composer(object): ...@@ -238,19 +240,38 @@ class Composer(object):
if __name__ == '__main__': if __name__ == '__main__':
from PyFin.api import LAST from alphamind.api import (risk_styles,
from alphamind.data.engines.sqlengine import risk_styles, industry_styles industry_styles,
from alphamind.model.linearmodel import LinearRegression standardize,
winsorize_normal,
DataMeta,
LinearRegression,
fetch_data_package,
map_freq)
from PyFin.api import LAST, SHIFT
freq = '60b'
universe = Universe('custom', ['ashare_ex']) universe = Universe('custom', ['ashare_ex'])
freq = '20b' batch = 1
batch = 0 neutralized_risk = industry_styles
neutralized_risk = risk_styles + industry_styles
risk_model = 'short' risk_model = 'short'
pre_process = [winsorize_normal, standardize] pre_process = [winsorize_normal, standardize]
post_process = [standardize] post_process = [standardize]
warm_start = 0 warm_start = 3
data_source = "postgres+psycopg2://postgres:we083826@localhost/alpha" data_source = None
horizon = map_freq(freq)
engine = SqlEngine(data_source)
fit_intercept = True
kernal_feature = 'roe_q'
regress_features = {kernal_feature: LAST(kernal_feature),
kernal_feature + '_l1': SHIFT(kernal_feature, 1),
kernal_feature + '_l2': SHIFT(kernal_feature, 2),
kernal_feature + '_l3': SHIFT(kernal_feature, 3)
}
const_features = {kernal_feature: LAST(kernal_feature)}
fit_target = [kernal_feature]
data_meta = DataMeta(freq=freq, data_meta = DataMeta(freq=freq,
universe=universe, universe=universe,
...@@ -262,9 +283,28 @@ if __name__ == '__main__': ...@@ -262,9 +283,28 @@ if __name__ == '__main__':
warm_start=warm_start, warm_start=warm_start,
data_source=data_source) data_source=data_source)
alpha_model = LinearRegression({'roe_q': LAST('roe_q')}, fit_target='roe_q') alpha_model = LinearRegression(features=regress_features, fit_intercept=True, fit_target=fit_target)
composer = Composer(alpha_model=alpha_model, data_meta=data_meta) composer = Composer(alpha_model=alpha_model, data_meta=data_meta)
ref_date = '2018-01-30' start_date = '2014-01-01'
composer.train(ref_date) end_date = '2016-01-01'
res = composer.predict(ref_date)
\ No newline at end of file regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target)
regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta)
#regression_composer.train('2010-07-07')
data_package1 = fetch_data_package(engine,
alpha_factors=[kernal_feature],
start_date=start_date,
end_date=end_date,
frequency=freq,
universe=universe,
benchmark=906,
warm_start=warm_start,
batch=1,
neutralized_risk=neutralized_risk,
pre_process=pre_process,
post_process=post_process,
fit_target=fit_target)
pass
\ No newline at end of file
...@@ -30,7 +30,7 @@ def _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model, ...@@ -30,7 +30,7 @@ def _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model,
risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1]
used_neutralized_risk = list(set(total_risk_factors).difference(names)) used_neutralized_risk = list(set(total_risk_factors).difference(names))
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
target_df = pd.merge(target_df, risk_df, on=['trade_date', 'code']) target_df = pd.merge(target_df, risk_df, on=['trade_date', 'code']).dropna()
if neutralized_risk: if neutralized_risk:
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
...@@ -220,7 +220,7 @@ def fetch_data_package(engine: SqlEngine, ...@@ -220,7 +220,7 @@ def fetch_data_package(engine: SqlEngine,
frequency, frequency,
universe, universe,
benchmark, benchmark,
warm_start, warm_start + batch,
fit_target=fit_target) fit_target=fit_target)
target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \ target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
...@@ -255,7 +255,17 @@ def fetch_data_package(engine: SqlEngine, ...@@ -255,7 +255,17 @@ def fetch_data_package(engine: SqlEngine,
ret = dict() ret = dict()
ret['x_names'] = names ret['x_names'] = names
ret['settlement'] = target_df ret['settlement'] = target_df[target_df.trade_date >= start_date]
train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date}
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets} ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
'code': predict_codes_bucket} 'code': predict_codes_bucket}
...@@ -403,7 +413,7 @@ def fetch_predict_phase(engine, ...@@ -403,7 +413,7 @@ def fetch_predict_phase(engine,
train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left') train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left')
risk_exp = None risk_exp = None
train_x.dropna(inplace=True) train_x.dropna(inplace=True, subset=train_x.columns[:-1])
x_values = train_x[names].values.astype(float) x_values = train_x[names].values.astype(float)
y_values = train_x[['dx']].values.astype(float) y_values = train_x[['dx']].values.astype(float)
...@@ -456,18 +466,3 @@ def fetch_predict_phase(engine, ...@@ -456,18 +466,3 @@ def fetch_predict_phase(engine,
return ret return ret
if __name__ == '__main__':
from alphamind.api import risk_styles, industry_styles, standardize
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = risk_styles + industry_styles
res = fetch_train_phase(engine, ['ep_q'],
'2012-01-05',
'5b',
universe,
2,
neutralized_risk=neutralized_risk,
post_process=[standardize],
fit_target='closePrice')
print(res)
...@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object): ...@@ -22,7 +22,7 @@ class ConstLinearModelImpl(object):
self.weights = weights.flatten() self.weights = weights.flatten()
def fit(self, x: np.ndarray, y: np.ndarray): def fit(self, x: np.ndarray, y: np.ndarray):
pass raise NotImplementedError("Const linear model doesn't offer fit methodology")
def predict(self, x: np.ndarray): def predict(self, x: np.ndarray):
return x @ self.weights return x @ self.weights
......
...@@ -27,6 +27,8 @@ class BoundaryDirection(IntEnum): ...@@ -27,6 +27,8 @@ class BoundaryDirection(IntEnum):
class BoundaryType(IntEnum): class BoundaryType(IntEnum):
ABSOLUTE = 0 ABSOLUTE = 0
RELATIVE = 1 RELATIVE = 1
MAXABSREL = 2
MINABSREL = 3
class BoundaryImpl(object): class BoundaryImpl(object):
...@@ -34,14 +36,14 @@ class BoundaryImpl(object): ...@@ -34,14 +36,14 @@ class BoundaryImpl(object):
def __init__(self, def __init__(self,
direction: BoundaryDirection, direction: BoundaryDirection,
b_type: BoundaryType, b_type: BoundaryType,
val: float): val):
self.direction = direction self.direction = direction
self.b_type = b_type self.b_type = b_type
self.val = val self.val = val
self._validation() self._validation()
def _validation(self): def _validation(self):
pyFinAssert(self.b_type == BoundaryType.ABSOLUTE or self.b_type == BoundaryType.RELATIVE, pyFinAssert(self.b_type in [BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL, BoundaryType.MINABSREL],
ValueError, ValueError,
"Boundary Type {0} is not recognized".format(self.b_type)) "Boundary Type {0} is not recognized".format(self.b_type))
...@@ -52,6 +54,28 @@ class BoundaryImpl(object): ...@@ -52,6 +54,28 @@ class BoundaryImpl(object):
def __call__(self, center: float): def __call__(self, center: float):
if self.b_type == BoundaryType.ABSOLUTE: if self.b_type == BoundaryType.ABSOLUTE:
return self.val + center return self.val + center
elif self.b_type == BoundaryType.MAXABSREL:
abs_threshold = self.val[0]
rel_threshold = self.val[1]
if self.direction == BoundaryDirection.LOWER:
rel_bound = center - abs(center) * rel_threshold
abs_bound = center - abs_threshold
return min(rel_bound, abs_bound)
elif self.direction == BoundaryDirection.UPPER:
rel_bound = center + abs(center) * rel_threshold
abs_bound = center + abs_threshold
return max(rel_bound, abs_bound)
elif self.b_type == BoundaryType.MINABSREL:
abs_threshold = self.val[0]
rel_threshold = self.val[1]
if self.direction == BoundaryDirection.LOWER:
rel_bound = center - abs(center) * rel_threshold
abs_bound = center - abs_threshold
return max(rel_bound, abs_bound)
elif self.direction == BoundaryDirection.UPPER:
rel_bound = center + abs(center) * rel_threshold
abs_bound = center + abs_threshold
return min(rel_bound, abs_bound)
else: else:
pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value") pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
return self.val * center return self.val * center
...@@ -106,9 +130,7 @@ class LinearConstraints(object): ...@@ -106,9 +130,7 @@ class LinearConstraints(object):
bounds: Dict[str, BoxBoundary], bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame, cons_mat: pd.DataFrame,
backbone: np.ndarray=None): backbone: np.ndarray=None):
pyFinAssert(len(bounds) == cons_mat.shape[1], "Number of bounds should be same as number of col of cons_mat") self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns)))
self.names = list(bounds.keys())
self.bounds = bounds self.bounds = bounds
self.cons_mat = cons_mat self.cons_mat = cons_mat
self.backbone = backbone self.backbone = backbone
......
...@@ -35,7 +35,7 @@ def linear_builder(er: np.ndarray, ...@@ -35,7 +35,7 @@ def linear_builder(er: np.ndarray,
if isinstance(ubound, float): if isinstance(ubound, float):
ubound = np.ones(n) * ubound ubound = np.ones(n) * ubound
if not turn_over_target: if not turn_over_target or current_position is None:
cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1) cons_matrix = np.concatenate((risk_constraints.T, risk_lbound, risk_ubound), axis=1)
opt = LPOptimizer(cons_matrix, lbound, ubound, -er, method) opt = LPOptimizer(cons_matrix, lbound, ubound, -er, method)
......
This diff is collapsed.
...@@ -6,42 +6,24 @@ Created on 2018-2-9 ...@@ -6,42 +6,24 @@ Created on 2018-2-9
""" """
import unittest import unittest
from PyFin.api import LAST
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.engines.universe import load_universe
class TestUniverse(unittest.TestCase): class TestUniverse(unittest.TestCase):
def test_universe_equal(self): def test_universe_equal(self):
universe1 = Universe('custom', ['zz500']) universe1 = Universe('zz500')
universe2 = Universe('custom', ['zz500']) universe2 = Universe('zz500')
self.assertEqual(universe1, universe2) self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500']) universe1 = Universe('zz500')
universe2 = Universe('custom', ['zz800']) universe2 = Universe('zz800')
self.assertNotEqual(universe1, universe2)
filter_cond = LAST('x') > 1.
universe1 = Universe('custom', ['zz500'], filter_cond=filter_cond)
universe2 = Universe('custom', ['zz500'], filter_cond=filter_cond)
self.assertEqual(universe1, universe2)
universe1 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
universe2 = Universe('custom', ['zz500'], filter_cond=LAST('x') > 2.)
self.assertNotEqual(universe1, universe2) self.assertNotEqual(universe1, universe2)
def test_universe_persistence(self): def test_universe_persistence(self):
universe = Universe('custom', ['zz500']) universe = Universe('zz500')
univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc)
self.assertEqual(universe.name, loaded_universe.name)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe)
universe = Universe('custom', ['zz500'], filter_cond=LAST('x') > 1.)
univ_desc = universe.save() univ_desc = universe.save()
loaded_universe = Universe.load(univ_desc) loaded_universe = load_universe(univ_desc)
self.assertEqual(universe.name, loaded_universe.name) self.assertEqual(universe, loaded_universe)
self.assertListEqual(universe.base_universe, loaded_universe.base_universe) \ No newline at end of file
self.assertEqual(str(universe.filter_cond), str(loaded_universe.filter_cond))
...@@ -21,7 +21,7 @@ class TestComposer(unittest.TestCase): ...@@ -21,7 +21,7 @@ class TestComposer(unittest.TestCase):
def test_data_meta_persistence(self): def test_data_meta_persistence(self):
freq = '5b' freq = '5b'
universe = Universe('custom', ['zz800']) universe = Universe('zz800')
batch = 4 batch = 4
neutralized_risk = ['SIZE'] neutralized_risk = ['SIZE']
risk_model = 'long' risk_model = 'long'
...@@ -55,7 +55,7 @@ class TestComposer(unittest.TestCase): ...@@ -55,7 +55,7 @@ class TestComposer(unittest.TestCase):
def test_composer_persistence(self): def test_composer_persistence(self):
freq = '5b' freq = '5b'
universe = Universe('custom', ['zz800']) universe = Universe('zz800')
batch = 4 batch = 4
neutralized_risk = ['SIZE'] neutralized_risk = ['SIZE']
risk_model = 'long' risk_model = 'long'
......
...@@ -78,6 +78,34 @@ class TestConstraints(unittest.TestCase): ...@@ -78,6 +78,34 @@ class TestConstraints(unittest.TestCase):
self.assertAlmostEqual(l, 1.76) self.assertAlmostEqual(l, 1.76)
self.assertAlmostEqual(u, 2.42) self.assertAlmostEqual(u, 2.42)
def test_max_abs_relative_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.MAXABSREL,
(0.02, 0.2))
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.MAXABSREL,
(0.02, 0.2))
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 1.76)
self.assertAlmostEqual(u, 2.64)
def test_min_abs_relative_boundary(self):
lower = BoundaryImpl(BoundaryDirection.LOWER,
BoundaryType.MINABSREL,
(0.02, 0.2))
upper = BoundaryImpl(BoundaryDirection.UPPER,
BoundaryType.MINABSREL,
(0.02, 0.2))
bound = BoxBoundary(lower, upper)
center = 2.2
l, u = bound.bounds(center)
self.assertAlmostEqual(l, 2.18)
self.assertAlmostEqual(u, 2.22)
def test_create_box_bounds_single_value(self): def test_create_box_bounds_single_value(self):
names = ['a', 'b', 'c'] names = ['a', 'b', 'c']
b_type = BoundaryType.RELATIVE b_type = BoundaryType.RELATIVE
......
...@@ -5,9 +5,15 @@ Created on 2017-4-25 ...@@ -5,9 +5,15 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import os
SKIP_ENGINE_TESTS = True SKIP_ENGINE_TESTS = True
if not SKIP_ENGINE_TESTS:
DATA_ENGINE_URI = os.environ['DB_URI']
else:
DATA_ENGINE_URI = None
if __name__ == '__main__': if __name__ == '__main__':
from simpleutils import add_parent_path from simpleutils import add_parent_path
......
{ {
"cells": [ "cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* 请在环境变量中设置`DB_URI`指向数据库"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
...@@ -8,6 +15,7 @@ ...@@ -8,6 +15,7 @@
"source": [ "source": [
"%matplotlib inline\n", "%matplotlib inline\n",
"\n", "\n",
"import os\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"from matplotlib import pyplot as plt\n", "from matplotlib import pyplot as plt\n",
...@@ -42,12 +50,12 @@ ...@@ -42,12 +50,12 @@
"horizon = map_freq(frequency)\n", "horizon = map_freq(frequency)\n",
"weight_gap = 0.01\n", "weight_gap = 0.01\n",
"benchmark_code = 905\n", "benchmark_code = 905\n",
"universe_name = ['zz800']\n", "universe_name = 'zz800'\n",
"universe = Universe('custom', universe_name)\n", "universe = Universe(universe_name)\n",
"ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n", "ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')\n",
"\n", "\n",
"executor = NaiveExecutor()\n", "executor = NaiveExecutor()\n",
"data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'\n", "data_source = os.environ['DB_URI']\n",
"engine = SqlEngine(data_source)" "engine = SqlEngine(data_source)"
] ]
}, },
...@@ -271,7 +279,7 @@ ...@@ -271,7 +279,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with pd.ExcelWriter(f'{universe_name[0]}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:\n", "with pd.ExcelWriter(f'{universe_name}_{benchmark_code}.xlsx', engine='xlsxwriter') as writer:\n",
" factor_df.to_excel(writer, sheet_name='ret')\n", " factor_df.to_excel(writer, sheet_name='ret')\n",
" ic_df.to_excel(writer, sheet_name='ic')\n", " ic_df.to_excel(writer, sheet_name='ic')\n",
" factor_res.to_excel(writer, sheet_name='ret_stat')\n", " factor_res.to_excel(writer, sheet_name='ret_stat')\n",
...@@ -309,7 +317,7 @@ ...@@ -309,7 +317,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.4" "version": "3.6.5"
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {
......
{ {
"cells": [ "cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* 请在环境变量中设置`DB_URI`指向数据库"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"from cvxpy import *\n", "from cvxpy import *\n",
...@@ -31,7 +39,7 @@ ...@@ -31,7 +39,7 @@
"risk_penlty = 0.5\n", "risk_penlty = 0.5\n",
"ref_date = '2018-02-08'\n", "ref_date = '2018-02-08'\n",
"\n", "\n",
"engine = SqlEngine()\n", "engine = SqlEngine(os.environ['DB_URI'])\n",
"universe = Universe('custom', ['ashare_ex'])\n", "universe = Universe('custom', ['ashare_ex'])\n",
"codes = engine.fetch_codes(ref_date, universe)\n", "codes = engine.fetch_codes(ref_date, universe)\n",
"\n", "\n",
...@@ -199,8 +207,8 @@ ...@@ -199,8 +207,8 @@
"source": [ "source": [
"def time_function(py_callable, n):\n", "def time_function(py_callable, n):\n",
" start = dt.datetime.now()\n", " start = dt.datetime.now()\n",
" py_callable(n)\n", " val = py_callable(n)\n",
" return (dt.datetime.now() - start).total_seconds()" " return (dt.datetime.now() - start).total_seconds(), val"
] ]
}, },
{ {
...@@ -221,7 +229,8 @@ ...@@ -221,7 +229,8 @@
" sum_entries(w) == 1,]\n", " sum_entries(w) == 1,]\n",
"\n", "\n",
" prob = Problem(objective, constraints)\n", " prob = Problem(objective, constraints)\n",
" prob.solve(verbose=False, solver='CVXOPT', display=False)" " prob.solve(verbose=False, solver='CVXOPT', display=False)\n",
" return prob.value"
] ]
}, },
{ {
...@@ -252,7 +261,8 @@ ...@@ -252,7 +261,8 @@
" b = matrix(b)\n", " b = matrix(b)\n",
" \n", " \n",
" solvers.options['show_progress'] = False\n", " solvers.options['show_progress'] = False\n",
" sol = solvers.qp(P, q, G, h, A, b)" " sol = solvers.qp(P, q, G, h, A, b)\n",
" return sol['primal objective']"
] ]
}, },
{ {
...@@ -268,7 +278,7 @@ ...@@ -268,7 +278,7 @@
" clb = np.ones(1)\n", " clb = np.ones(1)\n",
" cub = np.ones(1)\n", " cub = np.ones(1)\n",
" qpopt = QPOptimizer(signal, sec_cov_values, lbound, ubound, cons_matrix, clb, cub, 1.)\n", " qpopt = QPOptimizer(signal, sec_cov_values, lbound, ubound, cons_matrix, clb, cub, 1.)\n",
" qpopt.feval()" " return qpopt.feval()"
] ]
}, },
{ {
...@@ -286,27 +296,16 @@ ...@@ -286,27 +296,16 @@
"for i, n in enumerate(n_steps):\n", "for i, n in enumerate(n_steps):\n",
" sec_cov_values = sec_cov_values_full[:n, :n]\n", " sec_cov_values = sec_cov_values_full[:n, :n]\n",
" signal = signal_full[:n]\n", " signal = signal_full[:n]\n",
" cvxpy_times[i] = time_function(cvxpy, n) * 1000\n", " cvxpy_times[i], val1 = time_function(cvxpy, n)\n",
" cvxopt_times[i] = time_function(cvxopt, n) * 1000\n", " cvxopt_times[i], val2 = time_function(cvxopt, n)\n",
" ipopt_times[i] = time_function(ipopt, n) * 1000\n", " ipopt_times[i], val3 = time_function(ipopt, n)\n",
" \n", " \n",
" print(\"{0:<8}{1:>12.2f}{2:>12.2f}{3:>12.2f}\".format(n, cvxpy_times[i], cvxopt_times[i], ipopt_times[i]))" " np.testing.assert_almost_equal(val1, val2, 4)\n",
" np.testing.assert_almost_equal(val2, val3, 4)\n",
" \n",
" print(\"{0:<8}{1:>12.4f}{2:>12.4f}{3:>12.4f}\".format(n, cvxpy_times[i], cvxopt_times[i], ipopt_times[i]))"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
...@@ -331,7 +330,7 @@ ...@@ -331,7 +330,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.4" "version": "3.6.5"
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"* 使用行业内的排序,进行因子测试;与回归版本,以及原始因子值版本进行比较。本部分参考自《QEPM》 p.p 117\n",
"* 请在环境变量中设置`DB_URI`指向数据库"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 参数设定\n",
"--------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"from PyFin.api import *\n",
"from alphamind.api import *\n",
"\n",
"factor = 'CFO2EV'\n",
"universe = Universe('custom', ['zz800'])\n",
"start_date = '2010-01-01'\n",
"end_date = '2018-04-26'\n",
"freq = '20b'\n",
"category = 'sw_adj'\n",
"level = 1\n",
"horizon = map_freq(freq)\n",
"\n",
"engine = SqlEngine(os.environ['DB_URI'])\n",
"\n",
"ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')\n",
"sample_date = '2018-01-04'\n",
"sample_codes = engine.fetch_codes(sample_date, universe)\n",
"\n",
"sample_industry = engine.fetch_industry(sample_date, sample_codes, category=category, level=level)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sample_industry.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 样例因子\n",
"--------------------\n",
"\n",
"我们下面分三种方法,分别考查这几种方法在避免行业集中上面的效果:\n",
"\n",
"* 使用原始因子的排序;\n",
"* 使用原始因子在行业内的排序;\n",
"* 使用原始因子在行业哑变量上回归后得到的残差排序。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. 原始因子排序\n",
"--------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor1 = {'f1': CSQuantiles(factor)}\n",
"sample_factor1 = engine.fetch_factor(sample_date, factor1, sample_codes)\n",
"sample_factor1 = pd.merge(sample_factor1, sample_industry[['code', 'industry']], on='code')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sample_factor1.sort_values('f1', ascending=False).head(15)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"对于原始因子,如果我们不做任何行业上面的处理,发现我们选定的alpha因子`CFO2EV`较大的股票集中于银行和大金融板块。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. 行业内排序因子\n",
"----------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"这里我们使用调整后的申万行业分类作为行业标签:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor2 = {'f2': CSQuantiles(factor, groups='sw1_adj')}\n",
"sample_factor2 = engine.fetch_factor(sample_date, factor2, sample_codes)\n",
"sample_factor2 = pd.merge(sample_factor2, sample_industry[['code', 'industry']], on='code')\n",
"sample_factor2.sort_values('f2', ascending=False).head(15)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"使用行业内的排序,则行业分布会比较平均。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. 使用回归将因子行业中性\n",
"--------------------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"还有一种思路,使用线性回归,以行业为哑变量,使用回归后的残差作为因子的替代值,做到行业中性:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factor3 = {'f3': factor}\n",
"sample_factor3 = engine.fetch_factor(sample_date, factor3, sample_codes)\n",
"risk_cov, risk_exp = engine.fetch_risk_model(sample_date, sample_codes)\n",
"sample_factor3 = pd.merge(sample_factor3, sample_industry[['code', 'industry']], on='code')\n",
"sample_factor3 = pd.merge(sample_factor3, risk_exp, on='code')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"raw_factors = sample_factor3['f3'].values\n",
"industry_exp = sample_factor3[industry_styles + ['COUNTRY']].values.astype(float)\n",
"processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp, post_process=[percentile])\n",
"sample_factor3['f3'] = processed_values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sample_factor3 = sample_factor3[['code', 'isOpen', 'f3', 'industry']]\n",
"sample_factor3.sort_values('f3', ascending=False).head(15)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"我们发现这种方法的效果并不是很好。调整的幅度并不是很大,同时仍然存在着集中于大金融板块的问题。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 回测结果\n",
"------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"我们使用简单等权重做多前20%支股票,做空后20%的方法,考察三种方法的效果:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"factors = {\n",
" 'raw': CSQuantiles(factor),\n",
" 'peer quantile': CSQuantiles(factor, groups='sw1'),\n",
" 'risk neutral': LAST(factor)\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ret = pd.DataFrame(columns=['raw', 'peer quantile', 'risk neutral'])\n",
"df_ic = pd.DataFrame(columns=['raw', 'peer quantile', 'risk neutral'])\n",
"\n",
"for date in ref_dates:\n",
" ref_date = date.strftime('%Y-%m-%d')\n",
" codes = engine.fetch_codes(ref_date, universe)\n",
"\n",
" total_factor = engine.fetch_factor(ref_date, factors, codes)\n",
" risk_cov, risk_exp = engine.fetch_risk_model(ref_date, codes)\n",
" industry = engine.fetch_industry(ref_date, codes, category=category, level=level)\n",
" rets = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1)\n",
" total_factor = pd.merge(total_factor, industry[['code', 'industry']], on='code')\n",
" total_factor = pd.merge(total_factor, risk_exp, on='code')\n",
" total_factor = pd.merge(total_factor, rets, on='code').dropna()\n",
"\n",
" raw_factors = total_factor['risk neutral'].values\n",
" industry_exp = total_factor[industry_styles + ['COUNTRY']].values.astype(float)\n",
" processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp, post_process=[percentile])\n",
" total_factor['risk neutral'] = processed_values\n",
"\n",
" total_factor[['f1_d', 'f2_d', 'f3_d']] = (total_factor[['raw', 'peer quantile', 'risk neutral']] >= 0.8) * 1.\n",
" total_factor.loc[total_factor['raw'] <= 0.2, 'f1_d'] = -1.\n",
" total_factor.loc[total_factor['peer quantile'] <= 0.2, 'f2_d'] = -1.\n",
" total_factor.loc[total_factor['risk neutral'] <= 0.2, 'f3_d'] = -1.\n",
" total_factor[['f1_d', 'f2_d', 'f3_d']] /= np.abs(total_factor[['f1_d', 'f2_d', 'f3_d']]).sum(axis=0)\n",
"\n",
" ret_values = total_factor.dx.values @ total_factor[['f1_d', 'f2_d', 'f3_d']].values\n",
" df_ret.loc[date] = ret_values\n",
" \n",
" ic_values = total_factor[['dx', 'raw', 'peer quantile', 'risk neutral']].corr().values[0, 1:]\n",
" df_ic.loc[date] = ic_values\n",
" print(f\"{date} is finished\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ret.cumsum().plot(figsize=(14, 7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_ic.cumsum().plot(figsize=(14, 7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"> The methodolegy is similar to The Barra China Equity Model (CNE5)'s documentation" "* 方法参考自 The Barra China Equity Model (CNE5)'s 文档\n",
"\n",
"* 请在环境变量中设置`DB_URI`指向数据库"
] ]
}, },
{ {
...@@ -15,6 +17,7 @@ ...@@ -15,6 +17,7 @@
"source": [ "source": [
"%matplotlib inline\n", "%matplotlib inline\n",
"\n", "\n",
"import os\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"from matplotlib import pyplot as plt\n", "from matplotlib import pyplot as plt\n",
...@@ -41,7 +44,7 @@ ...@@ -41,7 +44,7 @@
"category = 'sw'\n", "category = 'sw'\n",
"level = 1\n", "level = 1\n",
"freq = '20b'\n", "freq = '20b'\n",
"universe = Universe('custom', ['ashare_ex'])\n", "universe = Universe('ashare_ex')\n",
"\n", "\n",
"horizon = map_freq(freq)\n", "horizon = map_freq(freq)\n",
"ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')" "ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')"
...@@ -54,7 +57,7 @@ ...@@ -54,7 +57,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def risk_factor_analysis(factor_name):\n", "def risk_factor_analysis(factor_name):\n",
" data_source = 'postgres+psycopg2://postgres:A12345678!@10.63.6.220/alpha'\n", " data_source = os.environ['DB_URI']\n",
" engine = SqlEngine(data_source)\n", " engine = SqlEngine(data_source)\n",
" risk_names = list(set(risk_styles).difference({factor_name}))\n", " risk_names = list(set(risk_styles).difference({factor_name}))\n",
" industry_names = list(set(industry_styles).difference({factor_name}))\n", " industry_names = list(set(industry_styles).difference({factor_name}))\n",
...@@ -146,7 +149,7 @@ ...@@ -146,7 +149,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.4" "version": "3.6.5"
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {
......
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
"\n", "\n",
" objective = cvxpy.Minimize(-w.T * er)\n", " objective = cvxpy.Minimize(-w.T * er)\n",
" prob = cvxpy.Problem(objective, constraints)\n", " prob = cvxpy.Problem(objective, constraints)\n",
" prob.solve(solver='GLPK')\n", " prob.solve()\n",
" return w, prob" " return w, prob"
] ]
}, },
...@@ -113,6 +113,13 @@ ...@@ -113,6 +113,13 @@
" print(\"{0:<8}{1:>12.2f}{2:>12.2f}{3:>12f}{4:>12f}{5:>12f}{6:>15}\".format(n, elapsed*1000, result[1], s.min(), s.max(), s.sum(), s[0] + s[1]))" " print(\"{0:<8}{1:>12.2f}{2:>12.2f}{3:>12f}{4:>12f}{5:>12f}{6:>15}\".format(n, elapsed*1000, result[1], s.min(), s.max(), s.sum(), s[0] + s[1]))"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
...@@ -137,7 +144,7 @@ ...@@ -137,7 +144,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.4" "version": "3.6.5"
}, },
"varInspector": { "varInspector": {
"cols": { "cols": {
......
# 中证500增强新策略设置建议
## 当前情况
* 市场风格中,`EARNYILD`以及`SIZE`进入2月份以来发生了反转;
* 当前中证500策略回撤到达1.6%左右,打破之前的最大回撤记录;
* 当前策略比较显著的风格暴露:`EARNYILD`以及`BETA`的正向暴露;
* 存在风格暴露不太确定,没有上限的问题。
## 方案
### 措施
* 控制`EARNYILD`暴露在0.25至0.5之间;
* 控制`LIQUIDTY`暴露在-0.25至-0.5之间;
* 控制`GROWTH`暴露在0.2到0.4之间;
* 控制`SIZE`暴露至-0.2到0.0之间;
* 控制`BETA`暴露至0.0;
* 使用新的行业分类:sw_adj;
* 其他限制保持与当前一致。
### 特点
* 仍然会坚持当前以估值为核心,重视历史业绩表现的风格;
* 增加对风格暴露的控制,加入主观的风格判定;
* 主动增加对成长性的风格暴露;
* 对于某些行业采取主动暴露;
* 给予风格暴露上限,防止风格过于明显。
### 负面影响:
* 会提高换手率;
* 需要关注行业基本面的变化;
* 需要更多的关注风格的切换,难度较大。
### 注意事项
* 需要定期review当前风格;
* 对于`LIQUIDTY`的因子暴露,需要收集实际成交的信息。了解当前的交易成本假设是否能够覆盖低流动率下的真实交易成本。
## 因子
### 因子内容
新的因子组合,包括的内容:
* 估值类:`ep_q`;
* 质量类:`roe_q`;
* 成长类:`SGRO`, `GREV`;
* 技术类:`IVR`, `ILLIQUIDITY`;
* 预期信号:`con_target_price`, `con_pe_rolling_order`;
```python
alpha_factors = {
'f01': LAST('ep_q'),
'f02': LAST('roe_q'),
'f03': LAST('SGRO'),
'f04': LAST('GREV'),
'f05': LAST('con_target_price'),
'f06': LAST('con_pe_rolling_order'),
'f07': LAST('IVR'),
'f08': LAST('ILLIQUIDITY'),
}
```
### 因子权重
基本原则:
* `ep_q`作为核心因子,但是最近回撤较大,取1的权重;
* 基本面因子,赋予1的权重;
* 预期类因子,赋予1的权重;
* 技术指标类因子,最近表现较好,权重至0.5的权重。
```python
weights = dict(f01=1.,
f02=1.,
f03=1.,
f04=1.,
f05=1.,
f06=-1.,
f07=0.5,
f08=0.5,
)
```
# 沪深300增强新策略设置建议
## 当前情况
* 市场风格中,`EARNYILD`以及`SIZE`进入2月份以来发生了反转;
* 当前沪深300策略属于回测期;
* 当前策略比较显著的风格暴露:`EARNYILD`
* 当前策略的跟踪误差相对于中证500策略偏低;
* 存在风格暴露不太确定,没有上限的问题。
## 方案
### 措施
* 控制`EARNYILD`暴露在0.25至0.5之间;
* 控制`LIQUIDTY`暴露在-0.25至-0.5之间;
* 控制`SIZE`暴露至-0.2到0.0之间;
* 控制`BETA``SIZE``GROWTH`暴露至0.0;
* 使用新的行业分类:sw_adj;
* 其他限制保持与当前一致。
### 特点
* 仍然会坚持当前以估值为核心,重视历史业绩表现的风格;
* 增加对风格暴露的控制,加入主观的风格判定;
* 主动增加对成长性的风格暴露;
* 给予风格暴露上限,防止风格过于明显。
### 负面影响:
* 会提高换手率;
* 需要关注行业基本面的变化;
* 需要更多的关注风格的切换,难度较大。
### 注意事项
* 需要定期review当前风格;
* 对于`LIQUIDTY`的因子暴露,需要收集实际成交的信息。了解当前的交易成本假设是否能够覆盖低流动率下的真实交易成本。
## 因子
### 因子内容
新的因子组合,包括的内容:
* 估值类:`ep_q`;
* 质量类:`roe_q`;
* 成长类:`SGRO`, `GREV`;
* 技术类:`IVR`, `ILLIQUIDITY`;
* 预期信号:`con_target_price`, `con_pe_rolling_order`;
* 个股指标:`DividendPaidRatio`;
```python
alpha_factors = {
'f01': LAST('ep_q'),
'f02': LAST('roe_q'),
'f03': LAST('GREV'),
'f04': LAST('SGRO'),
'f05': LAST('con_target_price'),
'f06': LAST('con_pe_rolling_order'),
'f07': LAST('IVR'),
'f08': LAST('ILLIQUIDITY'),
'f09': LAST('DividendPaidRatio')
}
```
### 因子权重
基本原则:
* `ep_q`作为核心因子,但是最近回撤较大,取1的权重;
* 基本面因子,赋予1的权重;
* 预期类因子,赋予1的权重;
* 技术指标类因子,最近表现较好,权重至0.5的权重。
```python
weights = dict(f01=1.,
f02=1.,
f03=1.,
f04=1.,
f05=1.,
f06=-1.,
f07=0.5,
f08=0.5,
f09=0.5
)
```
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment