Commit 0c7657eb authored by Dr.李's avatar Dr.李

made it possible to do a empty factor list

parent 4af7af65
......@@ -66,6 +66,9 @@ def er_portfolio_analysis(er: np.ndarray,
is_tradable: Optional[np.ndarray] = None,
method='risk_neutral',
**kwargs) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
er = er.flatten()
def create_constraints(benchmark, **kwargs):
if 'lbound' in kwargs:
lbound = kwargs['lbound']
......
......@@ -21,6 +21,7 @@ from alphamind.data.dbmodel.models import FactorMaster
from alphamind.data.dbmodel.models import Strategy
from alphamind.data.dbmodel.models import DailyReturn
from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import Tiny
from alphamind.data.dbmodel.models import LegacyFactor
......@@ -117,6 +118,13 @@ def _map_factors(factors: Iterable[str]) -> dict:
return factor_cols
def _map_industry_category(category: str) -> str:
if category == 'sw':
return '申万行业分类'
else:
raise ValueError("No other industry is supported at the current time")
class SqlEngine(object):
def __init__(self,
db_url: str):
......@@ -414,7 +422,48 @@ class SqlEngine(object):
return risk_cov, risk_exp
def fetch_data(self, ref_date,
def fetch_industry(self,
ref_date: str,
codes: Iterable[int],
category: str='sw'):
industry_category_name = _map_industry_category(category)
query = select([Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]).where(
and_(
Industry.trade_date == ref_date,
Industry.code.in_(codes),
Industry.industry == industry_category_name
)
)
return pd.read_sql(query, self.engine)
def fetch_industry_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw'):
industry_category_name = _map_industry_category(category)
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
big_table = join(Industry, q2,
and_(Industry.trade_date == q2.c.trade_date, Industry.code == q2.c.code))
query = select(
[Industry.trade_date,
Industry.code,
Industry.industryID1.label('industry_code'),
Industry.industryName1.label('industry')]). \
select_from(big_table).where(
Industry.industry == industry_category_name
)
return pd.read_sql(query, self.engine)
def fetch_data(self, ref_date: str,
factors: Iterable[str],
codes: Iterable[int],
benchmark: int = None,
......@@ -449,7 +498,8 @@ class SqlEngine(object):
end_date: str = None,
dates: Iterable[str] = None,
benchmark: int = None,
risk_model: str = 'short') -> Dict[str, pd.DataFrame]:
risk_model: str = 'short',
industry: str='sw') -> Dict[str, pd.DataFrame]:
total_data = {}
transformer = Transformer(factors)
......@@ -467,22 +517,34 @@ class SqlEngine(object):
factor_data = pd.merge(factor_data, risk_exp, how='left', on=['trade_date', 'code'])
total_data['risk_cov'] = risk_cov
total_data['factor'] = factor_data
industry_info = self.fetch_industry_range(universe,
start_date=start_date,
end_date=end_date,
dates=dates,
category=industry)
append_industry_info(factor_data)
factor_data = pd.merge(factor_data, industry_info, on=['trade_date', 'code'])
total_data['factor'] = factor_data
return total_data
if __name__ == '__main__':
from PyFin.api import *
db_url = 'postgresql+psycopg2://postgres:we083826@localhost/alpha'
db_url = 'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
db_url = 'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
# db_url = 'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha'
universe = Universe('custom', ['zz500'])
engine = SqlEngine(db_url)
ref_date = '2017-08-10'
codes = engine.fetch_codes(universe=universe, ref_date='2017-08-10')
data2 = engine.fetch_factor_range(universe=universe, dates=['2017-08-01', '2017-08-10'], factors={'factor': MAXIMUM(('EPS', 'ROEDiluted'))})
start_date = '2017-08-01'
end_date = '2017-08-12'
codes = engine.fetch_codes(universe=universe, ref_date=ref_date)
data2 = engine.fetch_industry(ref_date=ref_date,
codes=codes)
data2 = engine.fetch_data_range(universe,
factors=['EPS'],
start_date=start_date,
end_date=end_date)
print(codes)
print(data2)
......@@ -16,6 +16,9 @@ DEFAULT_FACTOR_NAME = 'user_factor'
def factor_translator(factor_pool):
if not factor_pool:
return None, None
if isinstance(factor_pool, str):
return {factor_pool: factor_pool}, [factor_pool]
elif isinstance(factor_pool, SecurityValueHolder):
......@@ -57,11 +60,17 @@ class Transformer(object):
expression_dict, expression_dependency = \
factor_translator(expressions)
if expression_dict:
res = list(zip(*list(expression_dict.items())))
self.names = list(res[0])
self.expressions = list(res[1])
self.dependency = expression_dependency
else:
self.names = []
self.expressions = []
self.dependency = []
def transform(self, group_name, data):
if len(data) > 0:
......
......@@ -15,6 +15,7 @@ from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.utilities import alpha_logger
def _map_horizon(frequency: str) -> int:
......@@ -52,13 +53,15 @@ def prepare_data(engine: SqlEngine,
dates=dates,
warm_start=warm_start).sort_values(['trade_date', 'code'])
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
industry_df = engine.fetch_industry_range(universe, dates=dates)
benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
df = pd.merge(df, industry_df, on=['trade_date', 'code'])
df['weight'] = df['weight'].fillna(0.)
return df[['trade_date', 'code', 'dx']], df[['trade_date', 'code', 'weight'] + transformer.names]
return df[['trade_date', 'code', 'dx']], df[['trade_date', 'code', 'weight', 'isOpen', 'industry_code', 'industry'] + transformer.names]
def batch_processing(x_values,
......@@ -123,6 +126,8 @@ def fetch_data_package(engine: SqlEngine,
risk_model: str = 'short',
pre_process: Iterable[object] = None,
post_process: Iterable[object] = None):
alpha_logger.info("Starting data package fetching ...")
transformer = Transformer(alpha_factors)
dates = makeSchedule(start_date, end_date, frequency, calendar='china.sse', dateRule=BizDayConventions.Following)
return_df, factor_df = prepare_data(engine,
......@@ -134,6 +139,8 @@ def fetch_data_package(engine: SqlEngine,
benchmark,
warm_start)
alpha_logger.info("Loading data is finished")
if neutralized_risk:
risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1]
used_neutralized_risk = list(set(neutralized_risk).difference(transformer.names))
......@@ -157,6 +164,9 @@ def fetch_data_package(engine: SqlEngine,
dates = np.unique(date_label)
return_df['weight'] = train_x['weight']
return_df['industry'] = train_x['industry']
return_df['industry_code'] = train_x['industry_code']
return_df['isOpen'] = train_x['isOpen']
train_x_buckets, train_y_buckets, predict_x_buckets = batch_processing(x_values,
y_values,
......@@ -167,6 +177,8 @@ def fetch_data_package(engine: SqlEngine,
pre_process,
post_process)
alpha_logger.info("Data processing is finished")
ret = dict()
ret['settlement'] = return_df
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment