Commit 64259fc1 authored by Dr.李's avatar Dr.李

FEATURE: added benchmark fetching

parent 14c686dd
...@@ -8,6 +8,7 @@ import os ...@@ -8,6 +8,7 @@ import os
if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl": if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl":
from alphamind.data.dbmodel.models.models_rl import Market from alphamind.data.dbmodel.models.models_rl import Market
from alphamind.data.dbmodel.models.models_rl import IndexMarket
from alphamind.data.dbmodel.models.models_rl import Universe from alphamind.data.dbmodel.models.models_rl import Universe
from alphamind.data.dbmodel.models.models_rl import Industry from alphamind.data.dbmodel.models.models_rl import Industry
from alphamind.data.dbmodel.models.models_rl import RiskExposure from alphamind.data.dbmodel.models.models_rl import RiskExposure
...@@ -17,8 +18,11 @@ if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl": ...@@ -17,8 +18,11 @@ if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl":
from alphamind.data.dbmodel.models.models_rl import SpecificRiskDay from alphamind.data.dbmodel.models.models_rl import SpecificRiskDay
from alphamind.data.dbmodel.models.models_rl import SpecificRiskShort from alphamind.data.dbmodel.models.models_rl import SpecificRiskShort
from alphamind.data.dbmodel.models.models_rl import SpecificRiskLong from alphamind.data.dbmodel.models.models_rl import SpecificRiskLong
from alphamind.data.dbmodel.models.models_rl import IndexComponent
from alphamind.data.dbmodel.models.models_rl import IndexWeight
else: else:
from alphamind.data.dbmodel.models.models import Market from alphamind.data.dbmodel.models.models import Market
from alphamind.data.dbmodel.models.models import IndexMarket
from alphamind.data.dbmodel.models.models import Universe from alphamind.data.dbmodel.models.models import Universe
from alphamind.data.dbmodel.models.models import Industry from alphamind.data.dbmodel.models.models import Industry
from alphamind.data.dbmodel.models.models import RiskExposure from alphamind.data.dbmodel.models.models import RiskExposure
...@@ -30,5 +34,4 @@ else: ...@@ -30,5 +34,4 @@ else:
from alphamind.data.dbmodel.models.models import SpecificRiskLong from alphamind.data.dbmodel.models.models import SpecificRiskLong
from alphamind.data.dbmodel.models.models import FactorMaster from alphamind.data.dbmodel.models.models import FactorMaster
from alphamind.data.dbmodel.models.models import IndexComponent from alphamind.data.dbmodel.models.models import IndexComponent
from alphamind.data.dbmodel.models.models import IndexMarket
from alphamind.data.dbmodel.models.models import RiskMaster from alphamind.data.dbmodel.models.models import RiskMaster
...@@ -35,6 +35,50 @@ class _StkDailyPricePro(Base): ...@@ -35,6 +35,50 @@ class _StkDailyPricePro(Base):
is_verify = Column(INT, index=True, server_default=text("'0'")) is_verify = Column(INT, index=True, server_default=text("'0'"))
class _IndexDailyPrice(Base):
__tablename__ = 'index_daily_price'
__table_args__ = (
Index('unique_index_daily_price_index', 'trade_date', 'security_code', 'flag', unique=True),
)
id = Column(INT, primary_key=True)
trade_date = Column(Date)
indexCode = Column("security_code", Text)
chgPct = Column("change_pct", FLOAT)
secShortName = Column("name", Text)
is_valid = Column(INT, nullable=False)
flag = Column(INT, index=True, server_default=text("'1'"))
is_verify = Column(INT, index=True, server_default=text("'0'"))
class _Index(Base):
__tablename__ = "index"
__table_args__ = (
Index('unique_index_index', 'trade_date', 'isymbol', 'symbol', 'flag', unique=True),
)
id = Column(INT, primary_key=True)
trade_date = Column(Date)
indexSymbol = Column("isymbol", Text)
symbol = Column(Text)
weight = Column("weighing", FLOAT)
flag = Column(INT, index=True, server_default=text("'1'"))
class _IndexComponent(Base):
__tablename__ = "index_component"
__table_args__ = (
Index('unique_index_index', 'trade_date', 'isecurity_code', 'security_code', 'flag', unique=True),
)
id = Column(INT, primary_key=True)
trade_date = Column(Date)
indexSymbol = Column("isymbol", Text)
symbol = Column(Text)
indexCode = Column("isecurity_code", Text)
code = Column("security_code", Text)
flag = Column(INT, index=True, server_default=text("'1'"))
class _StkUniverse(Base): class _StkUniverse(Base):
__tablename__ = 'stk_universe' __tablename__ = 'stk_universe'
__table_args__ = ( __table_args__ = (
...@@ -358,6 +402,7 @@ class _SpecificRiskShort(Base): ...@@ -358,6 +402,7 @@ class _SpecificRiskShort(Base):
Market = _StkDailyPricePro Market = _StkDailyPricePro
IndexMarket = _IndexDailyPrice
Universe = _StkUniverse Universe = _StkUniverse
Industry = _SwIndustryDaily Industry = _SwIndustryDaily
RiskExposure = _RiskExposure RiskExposure = _RiskExposure
...@@ -367,3 +412,5 @@ RiskCovLong = _RiskCovLong ...@@ -367,3 +412,5 @@ RiskCovLong = _RiskCovLong
SpecificRiskDay = _SpecificRiskDay SpecificRiskDay = _SpecificRiskDay
SpecificRiskShort = _SpecificRiskShort SpecificRiskShort = _SpecificRiskShort
SpecificRiskLong = _SpecificRiskLong SpecificRiskLong = _SpecificRiskLong
IndexComponent = _IndexComponent
IndexWeight = _Index
...@@ -14,7 +14,7 @@ if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl": ...@@ -14,7 +14,7 @@ if "DB_VENDOR" in os.environ and os.environ["DB_VENDOR"].lower() == "rl":
from alphamind.data.engines.sqlengine.sqlengine_rl import risk_styles from alphamind.data.engines.sqlengine.sqlengine_rl import risk_styles
from alphamind.data.engines.sqlengine.sqlengine_rl import macro_styles from alphamind.data.engines.sqlengine.sqlengine_rl import macro_styles
else: else:
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine.sqlengine import total_risk_factors from alphamind.data.engines.sqlengine.sqlengine import total_risk_factors
from alphamind.data.engines.sqlengine.sqlengine import industry_styles from alphamind.data.engines.sqlengine.sqlengine import industry_styles
from alphamind.data.engines.sqlengine.sqlengine import risk_styles from alphamind.data.engines.sqlengine.sqlengine import risk_styles
......
...@@ -9,6 +9,7 @@ from typing import Iterable ...@@ -9,6 +9,7 @@ from typing import Iterable
from typing import List from typing import List
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from typing import Dict
import numpy as np import numpy as np
import pandas as pd import pandas as pd
...@@ -25,9 +26,12 @@ from PyFin.api import advanceDateByCalendar ...@@ -25,9 +26,12 @@ from PyFin.api import advanceDateByCalendar
from alphamind.data.dbmodel.models.models_rl import ( from alphamind.data.dbmodel.models.models_rl import (
Market, Market,
IndexMarket,
Industry, Industry,
RiskExposure, RiskExposure,
Universe as UniverseTable Universe as UniverseTable,
IndexComponent,
IndexWeight
) )
from alphamind.data.engines.utilities import factor_tables from alphamind.data.engines.utilities import factor_tables
from alphamind.data.engines.utilities import _map_factors from alphamind.data.engines.utilities import _map_factors
...@@ -85,6 +89,10 @@ macro_styles = ['COUNTRY'] ...@@ -85,6 +89,10 @@ macro_styles = ['COUNTRY']
total_risk_factors = risk_styles + industry_styles + macro_styles total_risk_factors = risk_styles + industry_styles + macro_styles
_map_index_codes = {
300: "2070000060"
}
DAILY_RETURN_OFFSET = 0 DAILY_RETURN_OFFSET = 0
...@@ -164,6 +172,37 @@ class SqlEngine: ...@@ -164,6 +172,37 @@ class SqlEngine:
post_process=post_process) post_process=post_process)
return df[['code', 'dx']] return df[['code', 'dx']]
def fetch_dx_return_index(self,
ref_date: str,
index_code: int,
expiry_date: str = None,
horizon: int = 0,
offset: int = 0) -> pd.DataFrame:
start_date = ref_date
index_code = _map_index_codes[index_code]
if not expiry_date:
end_date = advanceDateByCalendar('china.sse', ref_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y%m%d')
else:
end_date = expiry_date
query = select([IndexMarket.trade_date,
IndexMarket.indexCode.label('code'),
IndexMarket.chgPct.label("chgPct")]).where(
and_(
IndexMarket.trade_date.between(start_date, end_date),
IndexMarket.indexCode == index_code,
IndexMarket.flag == 1
)
).order_by(IndexMarket.trade_date, IndexMarket.indexCode)
df = pd.read_sql(query, self.session.bind).dropna()
df = self._create_stats(df, horizon, offset)
df = df[df.trade_date == ref_date]
return df[['code', 'dx']]
def fetch_dx_return_range(self, def fetch_dx_return_range(self,
universe, universe,
start_date: str = None, start_date: str = None,
...@@ -199,6 +238,42 @@ class SqlEngine: ...@@ -199,6 +238,42 @@ class SqlEngine:
return df.reset_index(drop=True).sort_values(['trade_date', 'code']) return df.reset_index(drop=True).sort_values(['trade_date', 'code'])
def fetch_dx_return_index_range(self,
index_code,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
horizon: int = 0,
offset: int = 0) -> pd.DataFrame:
if dates:
start_date = dates[0]
end_date = dates[-1]
index_code = _map_index_codes[index_code]
end_date = advanceDateByCalendar('china.sse', end_date,
str(
1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y-%m-%d')
query = select([IndexMarket.trade_date,
IndexMarket.indexCode.label('code'),
IndexMarket.chgPct.label("chgPct")]) \
.where(
and_(
IndexMarket.trade_date.between(start_date, end_date),
IndexMarket.indexCode == index_code,
IndexMarket.flag == 1
)
)
df = pd.read_sql(query, self.session.bind).dropna()
df = self._create_stats(df, horizon, offset)
if dates:
df = df[df.trade_date.isin(dates)]
return df
def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]: def fetch_codes(self, ref_date: str, universe: Universe) -> List[int]:
df = universe.query(self, ref_date, ref_date) df = universe.query(self, ref_date, ref_date)
return sorted(df.code.tolist()) return sorted(df.code.tolist())
...@@ -373,6 +448,19 @@ class SqlEngine: ...@@ -373,6 +448,19 @@ class SqlEngine:
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="") df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
return df.drop('industry_code', axis=1) return df.drop('industry_code', axis=1)
def fetch_industry_matrix_range(self,
universe: Universe,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
category: str = 'sw',
level: int = 1):
df = self.fetch_industry_range(universe, start_date, end_date, dates, category, level)
df['industry_name'] = df['industry']
df = pd.get_dummies(df, columns=['industry'], prefix="", prefix_sep="")
return df.drop('industry_code', axis=1).drop_duplicates(['trade_date', 'code'])
def fetch_industry_range(self, def fetch_industry_range(self,
universe: Universe, universe: Universe,
start_date: str = None, start_date: str = None,
...@@ -521,6 +609,152 @@ class SqlEngine: ...@@ -521,6 +609,152 @@ class SqlEngine:
models[ref_date] = FactorRiskModel(factor_cov, factor_loading, idsync) models[ref_date] = FactorRiskModel(factor_cov, factor_loading, idsync)
return pd.Series(models), risk_cov, risk_exp return pd.Series(models), risk_cov, risk_exp
def fetch_data(self,
ref_date: str,
factors: Iterable[str],
codes: Iterable[int],
benchmark: int = None,
risk_model: str = 'short',
industry: str = 'sw') -> Dict[str, pd.DataFrame]:
total_data = dict()
transformer = Transformer(factors)
factor_data = self.fetch_factor(ref_date,
transformer,
codes,
used_factor_tables=factor_tables)
if benchmark:
benchmark_data = self.fetch_benchmark(ref_date, benchmark)
total_data['benchmark'] = benchmark_data
factor_data = pd.merge(factor_data, benchmark_data, how='left', on=['code'])
factor_data['weight'] = factor_data['weight'].fillna(0.)
if risk_model:
excluded = list(set(total_risk_factors).intersection(transformer.dependency))
risk_cov, risk_exp = self.fetch_risk_model(ref_date, codes, risk_model, excluded)
factor_data = pd.merge(factor_data, risk_exp, how='left', on=['code'])
total_data['risk_cov'] = risk_cov
industry_info = self.fetch_industry(ref_date=ref_date,
codes=codes,
category=industry)
factor_data = pd.merge(factor_data, industry_info, on=['code'])
total_data['factor'] = factor_data
return total_data
def fetch_benchmark(self,
ref_date: str,
benchmark: int,
codes: Iterable[int] = None) -> pd.DataFrame:
benchmark = _map_index_codes[benchmark]
big_table = join(IndexComponent, IndexWeight,
and_(
IndexComponent.trade_date == IndexWeight.trade_date,
IndexComponent.indexSymbol == IndexWeight.indexSymbol,
IndexComponent.symbol == IndexWeight.symbol,
IndexComponent.flag == 1,
IndexWeight.flag == 1
)
)
query = select(
[IndexComponent.code.label("code"),
(IndexWeight.weight / 100.).label('weight')]).select_from(big_table). \
where(
and_(
IndexComponent.trade_date == ref_date,
IndexComponent.indexCode == benchmark,
)
).distinct()
df = pd.read_sql(query, self.engine)
if codes:
df.set_index(['code'], inplace=True)
df = df.reindex(codes).fillna(0.)
df.reset_index(inplace=True)
return df
def fetch_benchmark_range(self,
benchmark: int,
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
cond = IndexComponent.trade_date.in_(dates) if dates else IndexComponent.trade_date.between(
start_date,
end_date)
benchmark = _map_index_codes[benchmark]
big_table = join(IndexComponent, IndexWeight,
and_(
IndexComponent.trade_date == IndexWeight.trade_date,
IndexComponent.indexSymbol == IndexWeight.indexSymbol,
IndexComponent.symbol == IndexWeight.symbol,
IndexComponent.flag == 1,
IndexWeight.flag == 1
)
)
query = select(
[IndexComponent.trade_date,
IndexComponent.code.label("code"),
(IndexWeight.weight / 100.).label('weight')]).select_from(big_table). \
where(
and_(
cond,
IndexComponent.indexCode == benchmark,
)
).distinct()
return pd.read_sql(query, self.engine)
def fetch_data_range(self,
universe: Universe,
factors: Iterable[str],
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None,
benchmark: int = None,
risk_model: str = 'short',
industry: str = 'sw',
external_data: pd.DataFrame = None) -> Dict[str, pd.DataFrame]:
total_data = dict()
transformer = Transformer(factors)
factor_data = self.fetch_factor_range(universe,
transformer,
start_date,
end_date,
dates,
external_data=external_data)
if benchmark:
benchmark_data = self.fetch_benchmark_range(benchmark, start_date, end_date, dates)
total_data['benchmark'] = benchmark_data
factor_data = pd.merge(factor_data, benchmark_data, how='left',
on=['trade_date', 'code'])
factor_data['weight'] = factor_data['weight'].fillna(0.)
if risk_model:
excluded = list(set(total_risk_factors).intersection(transformer.dependency))
risk_cov, risk_exp = self.fetch_risk_model_range(universe, start_date, end_date, dates,
risk_model,
excluded)
factor_data = pd.merge(factor_data, risk_exp, how='left', on=['trade_date', 'code'])
total_data['risk_cov'] = risk_cov
industry_info = self.fetch_industry_range(universe,
start_date=start_date,
end_date=end_date,
dates=dates,
category=industry)
factor_data = pd.merge(factor_data, industry_info, on=['trade_date', 'code'])
total_data['factor'] = factor_data
return total_data
if __name__ == "__main__": if __name__ == "__main__":
db_url = "mysql+mysqldb://reader:Reader#2020@121.37.138.1:13317/vision?charset=utf8" db_url = "mysql+mysqldb://reader:Reader#2020@121.37.138.1:13317/vision?charset=utf8"
...@@ -529,6 +763,7 @@ if __name__ == "__main__": ...@@ -529,6 +763,7 @@ if __name__ == "__main__":
universe = Universe("hs300") universe = Universe("hs300")
start_date = '2020-01-01' start_date = '2020-01-01'
end_date = '2020-02-21' end_date = '2020-02-21'
benchmark = 300
df = sql_engine.fetch_factor("2020-02-21", factors=["BETA"], codes=["2010031963"]) df = sql_engine.fetch_factor("2020-02-21", factors=["BETA"], codes=["2010031963"])
print(df) print(df)
df = sql_engine.fetch_factor_range(universe=universe, start_date=start_date, end_date=end_date, factors=["BETA"]) df = sql_engine.fetch_factor_range(universe=universe, start_date=start_date, end_date=end_date, factors=["BETA"])
...@@ -539,10 +774,22 @@ if __name__ == "__main__": ...@@ -539,10 +774,22 @@ if __name__ == "__main__":
print(df) print(df)
df = sql_engine.fetch_dx_return_range(universe, start_date=start_date, end_date=end_date) df = sql_engine.fetch_dx_return_range(universe, start_date=start_date, end_date=end_date)
print(df) print(df)
df = sql_engine.fetch_dx_return_index("2020-10-09", index_code=benchmark)
print(df)
df = sql_engine.fetch_dx_return_index_range(start_date=start_date, end_date=end_date, index_code=benchmark)
print(df)
df = sql_engine.fetch_benchmark("2020-10-09", benchmark=benchmark)
print(df)
df = sql_engine.fetch_benchmark_range(start_date=start_date, end_date=end_date, benchmark=benchmark)
print(df)
df = sql_engine.fetch_industry(ref_date="2020-10-09", codes=["2010031963"]) df = sql_engine.fetch_industry(ref_date="2020-10-09", codes=["2010031963"])
print(df) print(df)
df = sql_engine.fetch_industry_matrix(ref_date="2020-10-09", codes=["2010031963"]) df = sql_engine.fetch_industry_matrix(ref_date="2020-10-09", codes=["2010031963"])
print(df) print(df)
df = sql_engine.fetch_industry_matrix_range(universe=universe,
start_date=start_date,
end_date=end_date)
print(df)
df = sql_engine.fetch_industry_range(start_date=start_date, end_date=end_date, universe=Universe("hs300")) df = sql_engine.fetch_industry_range(start_date=start_date, end_date=end_date, universe=Universe("hs300"))
print(df) print(df)
df = sql_engine.fetch_risk_model("2020-02-21", codes=["2010031963"]) df = sql_engine.fetch_risk_model("2020-02-21", codes=["2010031963"])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment