Commit b8af190d authored by Dr.李's avatar Dr.李

added one filter example and fix the bug for is_tradable flag

parent 3e54d3bc
......@@ -71,13 +71,13 @@ def er_portfolio_analysis(er: np.ndarray,
def create_constraints(benchmark, **kwargs):
if 'lbound' in kwargs:
lbound = kwargs['lbound']
lbound = kwargs['lbound'].copy()
del kwargs['lbound']
else:
lbound = 0.
if 'ubound' in kwargs:
ubound = kwargs['ubound']
ubound = kwargs['ubound'].copy()
del kwargs['ubound']
else:
ubound = 0.01 + benchmark
......
......@@ -82,15 +82,6 @@ total_risk_factors = risk_styles + industry_styles + macro_styles
factor_tables = [FullFactorView, Experimental]
def append_industry_info(df):
industry_arr = np.array(industry_styles)
industry_codes = np.arange(len(industry_styles), dtype=int)
industry_dummies = df[industry_styles].values.astype(bool)
df['industry'], df['industry_code'] = [industry_arr[row][0] for row in industry_dummies], \
[industry_codes[row][0] for row in industry_dummies]
def _map_risk_model_table(risk_model: str) -> tuple:
if risk_model == 'day':
return RiskCovDay, FullFactorView.d_srisk
......@@ -263,6 +254,7 @@ class SqlEngine(object):
if col not in set(['code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
df['isOpen'] = df.isOpen.astype(bool)
df = df.loc[ref_date]
df.index = list(range(len(df)))
return df
......@@ -272,7 +264,8 @@ class SqlEngine(object):
factors: Union[Transformer, Iterable[object]],
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
dates: Iterable[str] = None,
external_data: pd.DataFrame = None) -> pd.DataFrame:
if isinstance(factors, Transformer):
transformer = factors
......@@ -305,13 +298,19 @@ class SqlEngine(object):
query = select([FullFactorView.trade_date, FullFactorView.code, FullFactorView.isOpen] + list(factor_cols.keys())) \
.select_from(big_table)
df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code']).set_index('trade_date')
df = pd.read_sql(query, self.engine).sort_values(['trade_date', 'code'])
if external_data is not None:
df = pd.merge(df, external_data, on=['trade_date', 'code']).dropna()
df.set_index('trade_date', inplace=True)
res = transformer.transform('code', df)
for col in res.columns:
if col not in set(['code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values
df['isOpen'] = df.isOpen.astype(bool)
return df.reset_index()
def fetch_benchmark(self,
......@@ -462,7 +461,8 @@ class SqlEngine(object):
factors: Iterable[str],
codes: Iterable[int],
benchmark: int = None,
risk_model: str = 'short') -> Dict[str, pd.DataFrame]:
risk_model: str = 'short',
industry: str = 'sw') -> Dict[str, pd.DataFrame]:
total_data = {}
......@@ -481,9 +481,13 @@ class SqlEngine(object):
factor_data = pd.merge(factor_data, risk_exp, how='left', on=['code'])
total_data['risk_cov'] = risk_cov
total_data['factor'] = factor_data
industry_info = self.fetch_industry(ref_date=ref_date,
codes=codes,
category=industry)
append_industry_info(factor_data)
factor_data = pd.merge(factor_data, industry_info, on=['code'])
total_data['factor'] = factor_data
return total_data
def fetch_data_range(self,
......@@ -494,11 +498,17 @@ class SqlEngine(object):
dates: Iterable[str] = None,
benchmark: int = None,
risk_model: str = 'short',
industry: str = 'sw') -> Dict[str, pd.DataFrame]:
industry: str = 'sw',
external_data: pd.DataFrame = None) -> Dict[str, pd.DataFrame]:
total_data = {}
transformer = Transformer(factors)
factor_data = self.fetch_factor_range(universe, transformer, start_date, end_date, dates)
factor_data = self.fetch_factor_range(universe,
transformer,
start_date,
end_date,
dates,
external_data=external_data)
if benchmark:
benchmark_data = self.fetch_benchmark_range(benchmark, start_date, end_date, dates)
......
# -*- coding: utf-8 -*-
"""
Created on 2017-9-5
@author: cheng.li
"""
import pandas as pd
import numpy as np
from PyFin.api import *
from alphamind.api import *
from matplotlib import pyplot as plt
plt.style.use('ggplot')
sentiment_df = pd.read_csv('d:/xueqiu.csv', parse_dates=['trade_date']).sort_values(['trade_date', 'code']).set_index('trade_date')
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
index_name = 'zz500'
benchmark = 905
universe = Universe(index_name, [index_name])
neutralized_risk = ['SIZE'] + industry_styles
expression = MA(5, ['post'])
n_bins = 5
frequency = '1w'
new_factor_df = expression.transform(sentiment_df, name='xueqiu', category_field='code').reset_index()
factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']
weights = np.array([0.015881607, -0.015900173, -0.001792638,
0.014277867, 0.034129344, 0.019044573,
0.042747382, 0.048765746])
start_datge = '2016-01-01'
end_date = '2017-09-03'
dates = makeSchedule(start_datge, end_date, frequency, 'china.sse')
total_data = engine.fetch_data_range(universe,
factors,
dates=dates,
benchmark=benchmark)
return_data = engine.fetch_dx_return_range(universe,
dates=dates,
horizon=4)
settle_df = total_data['factor']
settle_df = pd.merge(settle_df, new_factor_df, on=['trade_date', 'code'])
settle_df = pd.merge(settle_df, return_data, on=['trade_date', 'code'])
settle_df.dropna(inplace=True)
settle_df.set_index('trade_date', inplace=True)
dates = settle_df.index.unique()
final_res = np.zeros(len(dates))
for i, date in enumerate(dates):
risk_exp = settle_df.loc[date, neutralized_risk].values
raw_factor = settle_df.loc[date, factors].values @ weights
dx_return = settle_df.loc[date, 'dx'].values
benchmark_w = settle_df.loc[date, 'weight'].values
neutralized_factor = factor_processing(raw_factor.reshape((-1, 1)),
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[standardize])
is_tradable = settle_df.loc[date, 'isOpen'].values.copy()
xueqiu_values = settle_df.loc[date, 'xueqiu'].values
top_p = np.percentile(xueqiu_values, 95)
is_tradable[xueqiu_values > top_p] = False
industry = settle_df.loc[date, 'industry'].values
constraints = Constraints(np.ones((len(is_tradable), 1)), ['total'])
constraints.set_constraints('total', benchmark_w.sum(), benchmark_w.sum())
res = er_portfolio_analysis(neutralized_factor,
industry,
dx_return=dx_return,
method='risk_neutral',
constraints=constraints,
is_tradable=is_tradable,
benchmark=benchmark_w)
final_res[i] = res[1]['er']['total']
print('{0} is finished'.format(date))
......@@ -37,11 +37,11 @@ portfolio_industry_neutralize = True
alpha_factors = ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted'] # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
benchmark = 905
n_bins = 5
frequency = '2w'
frequency = '1w'
batch = 8
start_date = '2017-01-01'
end_date = '2017-08-31'
method = 'risk_neutral'
end_date = '2017-09-03'
method = 'rank'
use_rank = 100
'''
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment