Commit 2daf5bbd authored by Dr.李's avatar Dr.李

added fetch daily return range

parent e623409d
......@@ -13,7 +13,7 @@ import numpy as np
import pandas as pd
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy import select, and_, outerjoin, join
from sqlalchemy import select, and_, outerjoin, join, over
from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster
......@@ -151,13 +151,17 @@ class SqlEngine(object):
def fetch_codes_range(self,
universe: Universe,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None) -> pd.DataFrame:
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
query = universe.query_range(start_date, end_date, dates)
return pd.read_sql(query, self.engine)
def fetch_dx_return(self, ref_date, codes, expiry_date=None, horizon=0):
def fetch_dx_return(self,
ref_date: str,
codes: Iterable[int],
expiry_date: str=None,
horizon: int=0) -> pd.DataFrame:
start_date = ref_date
if not expiry_date:
......@@ -165,7 +169,7 @@ class SqlEngine(object):
else:
end_date = expiry_date
query = select([DailyReturn.Code, func.sum(func.log(1. + DailyReturn.d1)).label('dx')]).where(
query = select([DailyReturn.Code, func.sum(func.ln(1. + DailyReturn.d1)).label('dx')]).where(
and_(
DailyReturn.Date.between(start_date, end_date),
DailyReturn.Code.in_(codes)
......@@ -174,6 +178,38 @@ class SqlEngine(object):
return pd.read_sql(query, self.session.bind)
def fetch_dx_return_range(self,
universe,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None,
horizon: int=0) -> pd.DataFrame:
if dates:
start_date = dates[0]
end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date, str(horizon) + 'b').strftime('%Y-%m-%d')
q2 = universe.query_range(start_date, end_date).alias('temp_universe')
big_table = join(DailyReturn, q2, and_(DailyReturn.Date == q2.c.Date, DailyReturn.Code == q2.c.Code))
stats = func.sum(func.ln(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.Code,
order_by=DailyReturn.Date,
rows=(0, horizon)).label('dx')
query = select([DailyReturn.Date, DailyReturn.Code, stats]) \
.select_from(big_table) \
.where(DailyReturn.Date.between(start_date, end_date))
df = pd.read_sql(query, self.session.bind)
if dates:
df = df[df.Date.isin(dates)]
return df
def fetch_factor(self,
ref_date: str,
factors: Iterable[str],
......@@ -193,9 +229,9 @@ class SqlEngine(object):
def fetch_factor_range(self,
universe: Universe,
factors: Iterable[str],
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None) -> pd.DataFrame:
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
factor_cols = _map_factors(factors)
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
......@@ -223,25 +259,19 @@ class SqlEngine(object):
def fetch_benchmark_range(self,
benchmark: int,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None) -> pd.DataFrame:
if dates:
query = select(
[IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.Date.in_(dates),
IndexComponent.indexCode == benchmark
)
)
else:
query = select([IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.Date.between(start_date, end_date),
IndexComponent.indexCode == benchmark
)
)
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
cond = IndexComponent.Date.in_(dates) if dates else IndexComponent.Date.between(start_date, end_date)
query = select(
[IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
cond,
IndexComponent.indexCode == benchmark
)
)
return pd.read_sql(query, self.engine)
def fetch_risk_model(self,
......@@ -273,28 +303,23 @@ class SqlEngine(object):
def fetch_risk_model_range(self,
universe: Universe,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None,
risk_model: str='short') -> Tuple[pd.DataFrame, pd.DataFrame]:
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
risk_model: str = 'short') -> Tuple[pd.DataFrame, pd.DataFrame]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
if dates:
query = select([risk_cov_table.Date,
risk_cov_table.FactorID,
risk_cov_table.Factor]
+ cov_risk_cols).where(
risk_cov_table.Date.in_(dates)
)
else:
query = select([risk_cov_table.Date,
risk_cov_table.FactorID,
risk_cov_table.Factor]
+ cov_risk_cols).where(
risk_cov_table.Date.between(start_date, end_date)
)
cond = risk_cov_table.Date.in_(dates) if dates else risk_cov_table.Date.between(start_date, end_date)
query = select([risk_cov_table.Date,
risk_cov_table.FactorID,
risk_cov_table.Factor]
+ cov_risk_cols).where(
cond
)
risk_cov = pd.read_sql(query, self.engine).sort_values(['Date', 'FactorID'])
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors]
......@@ -303,7 +328,8 @@ class SqlEngine(object):
special_risk_table.Code == RiskExposure.Code))
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
big_table = join(big_table, q2, and_(special_risk_table.Date == q2.c.Date, special_risk_table.Code == q2.c.Code))
big_table = join(big_table, q2,
and_(special_risk_table.Date == q2.c.Date, special_risk_table.Code == q2.c.Code))
query = select(
[RiskExposure.Date, RiskExposure.Code, special_risk_table.SRISK] + risk_exposure_cols) \
......@@ -342,9 +368,9 @@ class SqlEngine(object):
def fetch_data_range(self,
universe: Universe,
factors: Iterable[str],
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
benchmark: int = None,
risk_model: str = 'short') -> Dict[str, pd.DataFrame]:
......@@ -377,7 +403,8 @@ if __name__ == '__main__':
ref_date = '2017-08-10'
codes = engine.fetch_codes_range(universe, None, None, ['2017-01-01', '2017-08-10'])
data = engine.fetch_data_range(universe, ['EPS'], None, None, ['2017-01-01', '2017-08-10'], 905, 'short')
data1 = engine.fetch_dx_return('2017-08-01', )
data2 = engine.fetch_dx_return_range(universe, '2017-08-01', '2017-08-10', ['2017-08-01', '2017-08-10'])
print(codes)
print(data)
......@@ -76,25 +76,16 @@ class Universe(object):
query = select([UniverseTable.Date, UniverseTable.Code]).distinct()
all_and_conditions, all_or_conditions = self._create_condition()
if dates:
query = query.where(
and_(
UniverseTable.Date.in_(dates),
or_(
and_(*all_and_conditions),
*all_or_conditions
)
)
)
else:
query = query.where(
and_(
UniverseTable.Date.between(start_date, end_date),
or_(
and_(*all_and_conditions),
*all_or_conditions
)
dates_cond = UniverseTable.Date.in_(dates) if dates else UniverseTable.Date.between(start_date, end_date)
query = query.where(
and_(
dates_cond,
or_(
and_(*all_and_conditions),
*all_or_conditions
)
)
)
return query
......@@ -8,38 +8,56 @@ Created on 2017-8-16
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PyFin.api import makeSchedule
from PyFin.api import *
from alphamind.api import *
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
#engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('custom', ['pm500_mirror'])
#engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
engine = SqlEngine('postgresql+psycopg2://postgres:we083826@localhost/alpha')
universe = Universe('custom', ['zz500'])
neutralize_risk = ['SIZE'] + industry_styles
n_bins = 5
factor_weights = np.array([1.])
dates = makeSchedule('2016-08-14',
'2017-08-14',
tenor='1w',
freq = '1w'
if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
start_date = '2016-04-01'
end_date = '2017-08-16'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
prod_factors = ['EARNYILD', 'ROAEBIT']
prod_factors = ['EARNYILD', 'ROAEBIT', 'CHV', 'CFinc1']
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
return_all_data = engine.fetch_dx_return_range(universe, start_date, end_date, dates, horizon=horizon)
for factor in prod_factors:
factors = [factor]
final_res = np.zeros((len(dates), n_bins))
for i, date in enumerate(dates):
ref_date = date.strftime('%Y-%m-%d')
codes = engine.fetch_codes(ref_date, universe)
factor_groups = factor_all_data.groupby('Date')
return_groups = return_all_data.groupby('Date')
data = engine.fetch_data(ref_date, factors, codes, 905)
returns = engine.fetch_dx_return(ref_date, codes, horizon=4)
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1]
codes = data.Code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data['factor'], returns, on=['Code']).dropna()
total_data = pd.merge(data, returns, on=['Code']).dropna()
print(date, ': ', len(total_data))
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
......@@ -60,6 +78,10 @@ for factor in prod_factors:
final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1m')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df.cumsum().plot(figsize=(12, 6))
plt.title('{0} weekly re-balance'.format(factors[0]))
plt.savefig('{0}_big_universe_20170814.png'.format(factors[0]))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment