Commit 2daf5bbd authored by Dr.李's avatar Dr.李

added fetch daily return range

parent e623409d
...@@ -13,7 +13,7 @@ import numpy as np ...@@ -13,7 +13,7 @@ import numpy as np
import pandas as pd import pandas as pd
import sqlalchemy as sa import sqlalchemy as sa
import sqlalchemy.orm as orm import sqlalchemy.orm as orm
from sqlalchemy import select, and_, outerjoin, join from sqlalchemy import select, and_, outerjoin, join, over
from sqlalchemy.sql import func from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster from alphamind.data.dbmodel.models import FactorMaster
...@@ -151,13 +151,17 @@ class SqlEngine(object): ...@@ -151,13 +151,17 @@ class SqlEngine(object):
def fetch_codes_range(self, def fetch_codes_range(self,
universe: Universe, universe: Universe,
start_date: str=None, start_date: str = None,
end_date: str=None, end_date: str = None,
dates: Iterable[str]=None) -> pd.DataFrame: dates: Iterable[str] = None) -> pd.DataFrame:
query = universe.query_range(start_date, end_date, dates) query = universe.query_range(start_date, end_date, dates)
return pd.read_sql(query, self.engine) return pd.read_sql(query, self.engine)
def fetch_dx_return(self, ref_date, codes, expiry_date=None, horizon=0): def fetch_dx_return(self,
ref_date: str,
codes: Iterable[int],
expiry_date: str=None,
horizon: int=0) -> pd.DataFrame:
start_date = ref_date start_date = ref_date
if not expiry_date: if not expiry_date:
...@@ -165,7 +169,7 @@ class SqlEngine(object): ...@@ -165,7 +169,7 @@ class SqlEngine(object):
else: else:
end_date = expiry_date end_date = expiry_date
query = select([DailyReturn.Code, func.sum(func.log(1. + DailyReturn.d1)).label('dx')]).where( query = select([DailyReturn.Code, func.sum(func.ln(1. + DailyReturn.d1)).label('dx')]).where(
and_( and_(
DailyReturn.Date.between(start_date, end_date), DailyReturn.Date.between(start_date, end_date),
DailyReturn.Code.in_(codes) DailyReturn.Code.in_(codes)
...@@ -174,6 +178,38 @@ class SqlEngine(object): ...@@ -174,6 +178,38 @@ class SqlEngine(object):
return pd.read_sql(query, self.session.bind) return pd.read_sql(query, self.session.bind)
def fetch_dx_return_range(self,
universe,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None,
horizon: int=0) -> pd.DataFrame:
if dates:
start_date = dates[0]
end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date, str(horizon) + 'b').strftime('%Y-%m-%d')
q2 = universe.query_range(start_date, end_date).alias('temp_universe')
big_table = join(DailyReturn, q2, and_(DailyReturn.Date == q2.c.Date, DailyReturn.Code == q2.c.Code))
stats = func.sum(func.ln(1. + DailyReturn.d1)).over(
partition_by=DailyReturn.Code,
order_by=DailyReturn.Date,
rows=(0, horizon)).label('dx')
query = select([DailyReturn.Date, DailyReturn.Code, stats]) \
.select_from(big_table) \
.where(DailyReturn.Date.between(start_date, end_date))
df = pd.read_sql(query, self.session.bind)
if dates:
df = df[df.Date.isin(dates)]
return df
def fetch_factor(self, def fetch_factor(self,
ref_date: str, ref_date: str,
factors: Iterable[str], factors: Iterable[str],
...@@ -193,9 +229,9 @@ class SqlEngine(object): ...@@ -193,9 +229,9 @@ class SqlEngine(object):
def fetch_factor_range(self, def fetch_factor_range(self,
universe: Universe, universe: Universe,
factors: Iterable[str], factors: Iterable[str],
start_date: str=None, start_date: str = None,
end_date: str=None, end_date: str = None,
dates: Iterable[str]=None) -> pd.DataFrame: dates: Iterable[str] = None) -> pd.DataFrame:
factor_cols = _map_factors(factors) factor_cols = _map_factors(factors)
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe') q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
...@@ -223,25 +259,19 @@ class SqlEngine(object): ...@@ -223,25 +259,19 @@ class SqlEngine(object):
def fetch_benchmark_range(self, def fetch_benchmark_range(self,
benchmark: int, benchmark: int,
start_date: str=None, start_date: str = None,
end_date: str=None, end_date: str = None,
dates: Iterable[str]=None) -> pd.DataFrame: dates: Iterable[str] = None) -> pd.DataFrame:
if dates:
query = select( cond = IndexComponent.Date.in_(dates) if dates else IndexComponent.Date.between(start_date, end_date)
[IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.Date.in_(dates),
IndexComponent.indexCode == benchmark
)
)
else:
query = select([IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.Date.between(start_date, end_date),
IndexComponent.indexCode == benchmark
)
)
query = select(
[IndexComponent.Date, IndexComponent.Code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
cond,
IndexComponent.indexCode == benchmark
)
)
return pd.read_sql(query, self.engine) return pd.read_sql(query, self.engine)
def fetch_risk_model(self, def fetch_risk_model(self,
...@@ -273,28 +303,23 @@ class SqlEngine(object): ...@@ -273,28 +303,23 @@ class SqlEngine(object):
def fetch_risk_model_range(self, def fetch_risk_model_range(self,
universe: Universe, universe: Universe,
start_date: str=None, start_date: str = None,
end_date: str=None, end_date: str = None,
dates: Iterable[str]=None, dates: Iterable[str] = None,
risk_model: str='short') -> Tuple[pd.DataFrame, pd.DataFrame]: risk_model: str = 'short') -> Tuple[pd.DataFrame, pd.DataFrame]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model) risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors] cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
if dates:
query = select([risk_cov_table.Date, cond = risk_cov_table.Date.in_(dates) if dates else risk_cov_table.Date.between(start_date, end_date)
risk_cov_table.FactorID, query = select([risk_cov_table.Date,
risk_cov_table.Factor] risk_cov_table.FactorID,
+ cov_risk_cols).where( risk_cov_table.Factor]
risk_cov_table.Date.in_(dates) + cov_risk_cols).where(
) cond
else: )
query = select([risk_cov_table.Date,
risk_cov_table.FactorID,
risk_cov_table.Factor]
+ cov_risk_cols).where(
risk_cov_table.Date.between(start_date, end_date)
)
risk_cov = pd.read_sql(query, self.engine).sort_values(['Date', 'FactorID']) risk_cov = pd.read_sql(query, self.engine).sort_values(['Date', 'FactorID'])
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors] risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors]
...@@ -303,7 +328,8 @@ class SqlEngine(object): ...@@ -303,7 +328,8 @@ class SqlEngine(object):
special_risk_table.Code == RiskExposure.Code)) special_risk_table.Code == RiskExposure.Code))
q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe') q2 = universe.query_range(start_date, end_date, dates).alias('temp_universe')
big_table = join(big_table, q2, and_(special_risk_table.Date == q2.c.Date, special_risk_table.Code == q2.c.Code)) big_table = join(big_table, q2,
and_(special_risk_table.Date == q2.c.Date, special_risk_table.Code == q2.c.Code))
query = select( query = select(
[RiskExposure.Date, RiskExposure.Code, special_risk_table.SRISK] + risk_exposure_cols) \ [RiskExposure.Date, RiskExposure.Code, special_risk_table.SRISK] + risk_exposure_cols) \
...@@ -342,9 +368,9 @@ class SqlEngine(object): ...@@ -342,9 +368,9 @@ class SqlEngine(object):
def fetch_data_range(self, def fetch_data_range(self,
universe: Universe, universe: Universe,
factors: Iterable[str], factors: Iterable[str],
start_date: str=None, start_date: str = None,
end_date: str=None, end_date: str = None,
dates: Iterable[str]=None, dates: Iterable[str] = None,
benchmark: int = None, benchmark: int = None,
risk_model: str = 'short') -> Dict[str, pd.DataFrame]: risk_model: str = 'short') -> Dict[str, pd.DataFrame]:
...@@ -377,7 +403,8 @@ if __name__ == '__main__': ...@@ -377,7 +403,8 @@ if __name__ == '__main__':
ref_date = '2017-08-10' ref_date = '2017-08-10'
codes = engine.fetch_codes_range(universe, None, None, ['2017-01-01', '2017-08-10']) codes = engine.fetch_codes_range(universe, None, None, ['2017-01-01', '2017-08-10'])
data = engine.fetch_data_range(universe, ['EPS'], None, None, ['2017-01-01', '2017-08-10'], 905, 'short')
data1 = engine.fetch_dx_return('2017-08-01', )
data2 = engine.fetch_dx_return_range(universe, '2017-08-01', '2017-08-10', ['2017-08-01', '2017-08-10'])
print(codes) print(codes)
print(data) print(data)
...@@ -76,25 +76,16 @@ class Universe(object): ...@@ -76,25 +76,16 @@ class Universe(object):
query = select([UniverseTable.Date, UniverseTable.Code]).distinct() query = select([UniverseTable.Date, UniverseTable.Code]).distinct()
all_and_conditions, all_or_conditions = self._create_condition() all_and_conditions, all_or_conditions = self._create_condition()
if dates: dates_cond = UniverseTable.Date.in_(dates) if dates else UniverseTable.Date.between(start_date, end_date)
query = query.where(
and_( query = query.where(
UniverseTable.Date.in_(dates), and_(
or_( dates_cond,
and_(*all_and_conditions), or_(
*all_or_conditions and_(*all_and_conditions),
) *all_or_conditions
)
)
else:
query = query.where(
and_(
UniverseTable.Date.between(start_date, end_date),
or_(
and_(*all_and_conditions),
*all_or_conditions
)
) )
) )
)
return query return query
...@@ -8,38 +8,56 @@ Created on 2017-8-16 ...@@ -8,38 +8,56 @@ Created on 2017-8-16
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from PyFin.api import makeSchedule from PyFin.api import *
from alphamind.api import * from alphamind.api import *
engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha") #engine = SqlEngine("mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha")
#engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha') engine = SqlEngine('postgresql+psycopg2://postgres:we083826@localhost/alpha')
universe = Universe('custom', ['pm500_mirror']) universe = Universe('custom', ['zz500'])
neutralize_risk = ['SIZE'] + industry_styles neutralize_risk = ['SIZE'] + industry_styles
n_bins = 5 n_bins = 5
factor_weights = np.array([1.]) factor_weights = np.array([1.])
dates = makeSchedule('2016-08-14', freq = '1w'
'2017-08-14',
tenor='1w', if freq == '1m':
horizon = 21
elif freq == '1w':
horizon = 4
elif freq == '1d':
horizon = 0
start_date = '2016-04-01'
end_date = '2017-08-16'
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse') calendar='china.sse')
prod_factors = ['EARNYILD', 'ROAEBIT'] prod_factors = ['EARNYILD', 'ROAEBIT', 'CHV', 'CFinc1']
all_data = engine.fetch_data_range(universe, prod_factors, dates=dates, benchmark=905)
factor_all_data = all_data['factor']
return_all_data = engine.fetch_dx_return_range(universe, start_date, end_date, dates, horizon=horizon)
for factor in prod_factors: for factor in prod_factors:
factors = [factor] factors = [factor]
final_res = np.zeros((len(dates), n_bins)) final_res = np.zeros((len(dates), n_bins))
for i, date in enumerate(dates): factor_groups = factor_all_data.groupby('Date')
ref_date = date.strftime('%Y-%m-%d') return_groups = return_all_data.groupby('Date')
codes = engine.fetch_codes(ref_date, universe)
data = engine.fetch_data(ref_date, factors, codes, 905) for i, value in enumerate(factor_groups):
returns = engine.fetch_dx_return(ref_date, codes, horizon=4) date = value[0]
data = value[1]
codes = data.Code.tolist()
ref_date = value[0].strftime('%Y-%m-%d')
returns = return_groups.get_group(date)
total_data = pd.merge(data['factor'], returns, on=['Code']).dropna() total_data = pd.merge(data, returns, on=['Code']).dropna()
print(date, ': ', len(total_data)) print(date, ': ', len(total_data))
risk_exp = total_data[neutralize_risk].values.astype(float) risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values dx_return = total_data.dx.values
...@@ -60,6 +78,10 @@ for factor in prod_factors: ...@@ -60,6 +78,10 @@ for factor in prod_factors:
final_res[i] = res / benchmark.sum() final_res[i] = res / benchmark.sum()
df = pd.DataFrame(final_res, index=dates) df = pd.DataFrame(final_res, index=dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1m')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df.cumsum().plot(figsize=(12, 6)) df.cumsum().plot(figsize=(12, 6))
plt.title('{0} weekly re-balance'.format(factors[0])) plt.title('{0} weekly re-balance'.format(factors[0]))
plt.savefig('{0}_big_universe_20170814.png'.format(factors[0])) plt.savefig('{0}_big_universe_20170814.png'.format(factors[0]))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment