Commit ca918be8 authored by Dr.李's avatar Dr.李

update sql query

parent 366b0936
...@@ -442,16 +442,16 @@ class RiskExposure(Base): ...@@ -442,16 +442,16 @@ class RiskExposure(Base):
updateTime = Column(DateTime) updateTime = Column(DateTime)
t_risk_master = Table( class RiskMaster(Base):
'risk_master', metadata, __tablename__ = 'risk_master'
Column('factor', String(30, 'utf8_general_ci'), nullable=False),
Column('source', String(30, 'utf8_general_ci'), nullable=False), factor = Column(String(30, 'utf8_general_ci'), primary_key=True, nullable=False)
Column('alias', String(30, 'utf8_general_ci'), nullable=False), source = Column(String(30, 'utf8_general_ci'), primary_key=True, nullable=False)
Column('type', String(30, 'utf8_general_ci')), alias = Column(String(30, 'utf8_general_ci'), nullable=False)
Column('updateTime', DateTime), type = Column(String(30, 'utf8_general_ci'))
Column('description', Text(2147483647, 'utf8_general_ci')), updateTime = Column(DateTime)
Column('factor_id', Integer, nullable=False) description = Column(Text(2147483647, 'utf8_general_ci'))
) FactorID = Column(Integer, nullable=False)
class RiskReturn(Base): class RiskReturn(Base):
......
...@@ -11,7 +11,16 @@ from typing import Dict ...@@ -11,7 +11,16 @@ from typing import Dict
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import sqlalchemy as sa import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy import select, and_
from sqlalchemy import MetaData
from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster
from alphamind.data.dbmodel.models import Strategy
from alphamind.data.dbmodel.models import DailyReturn
from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data.dbmodel.models import Universe as UniverseTable
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA', risk_styles = ['BETA',
...@@ -70,29 +79,43 @@ class SqlEngine(object): ...@@ -70,29 +79,43 @@ class SqlEngine(object):
def __init__(self, def __init__(self,
db_url: str): db_url: str):
self.engine = sa.create_engine(db_url) self.engine = sa.create_engine(db_url)
self.create_session()
def create_session(self):
Session = orm.sessionmaker(bind=self.engine)
self.session = Session()
def fetch_factors_meta(self) -> pd.DataFrame: def fetch_factors_meta(self) -> pd.DataFrame:
sql = "select * from factor_master" query = self.session.query(FactorMaster)
return pd.read_sql(sql, self.engine) return pd.read_sql(query.statement, query.session.bind)
def fetch_strategy(self, ref_date: str, strategy: str) -> pd.DataFrame(): def fetch_strategy(self, ref_date: str, strategy: str) -> pd.DataFrame():
sql = "select strategyName, factor, weight from strategy " \ query = select([Strategy.strategyName, Strategy.factor, Strategy.weight]).where(
"where Date = '{ref_date}' and strategyName = '{strategy}'".format(ref_date=ref_date, strategy=strategy) and_(
return pd.read_sql(sql, self.engine) Strategy.Date == ref_date,
Strategy.strategyName == strategy
)
)
return pd.read_sql(query, self.session.bind)
def fetch_strategy_names(self): def fetch_strategy_names(self):
sql = "select distinct strategyName from strategy" query = select([Strategy.strategyName]).distinct()
cursor = self.engine.execute(sql) cursor = self.engine.execute(query)
strategy_names = {c[0] for c in cursor.fetchall()} strategy_names = {s[0] for s in cursor.fetchall()}
return strategy_names return strategy_names
def fetch_codes(self, ref_date: str, univ: Universe) -> List[int]: def fetch_codes(self, ref_date: str, univ: Universe) -> List[int]:
def get_universe(univ, ref_date): def get_universe(univ, ref_date):
univ_str = ','.join("'" + u + "'" for u in univ) query = select([UniverseTable.Code]).distinct().where(
sql = "select distinct Code from universe where Date = '{ref_date}' and universe in ({univ_str})".format( and_(
ref_date=ref_date, univ_str=univ_str) UniverseTable.Date == ref_date,
cursor = self.engine.execute(sql) UniverseTable.universe.in_(univ)
)
)
cursor = self.engine.execute(query)
codes_set = {c[0] for c in cursor.fetchall()} codes_set = {c[0] for c in cursor.fetchall()}
return codes_set return codes_set
...@@ -115,7 +138,6 @@ class SqlEngine(object): ...@@ -115,7 +138,6 @@ class SqlEngine(object):
return sorted(codes_set) return sorted(codes_set)
def fetch_dx_return(self, ref_date, codes, expiry_date=None, horizon=1): def fetch_dx_return(self, ref_date, codes, expiry_date=None, horizon=1):
start_date = ref_date start_date = ref_date
if not expiry_date and horizon: if not expiry_date and horizon:
...@@ -123,13 +145,14 @@ class SqlEngine(object): ...@@ -123,13 +145,14 @@ class SqlEngine(object):
elif expiry_date: elif expiry_date:
end_date = expiry_date end_date = expiry_date
codes_str = ','.join(str(c) for c in codes) query = select([DailyReturn.Code, func.sum(DailyReturn.d1).label('dx')]).where(
sql = "select Code, sum(d1) as dx from daily_return " \ and_(
"where Date >= '{start_date}' and Date < '{end_date}'" \ DailyReturn.Date.between(start_date, end_date),
" and Code in ({codes}) GROUP BY Code".format(start_date=start_date, DailyReturn.Code.in_(codes)
end_date=end_date, )
codes=codes_str) ).group_by(DailyReturn.Code)
return pd.read_sql(sql, self.engine)
return pd.read_sql(query, self.session.bind)
def fetch_data(self, ref_date, def fetch_data(self, ref_date,
factors: Iterable[str], factors: Iterable[str],
...@@ -138,10 +161,9 @@ class SqlEngine(object): ...@@ -138,10 +161,9 @@ class SqlEngine(object):
risk_model: str = 'short') -> Dict[str, pd.DataFrame]: risk_model: str = 'short') -> Dict[str, pd.DataFrame]:
def mapping_factors(factors): def mapping_factors(factors):
factor_list = ','.join("'" + f + "'" for f in factors)
sql = "select factor, source from factor_master where factor in ({factor_list})".format(factor_list=factor_list)
results = self.engine.execute(sql).fetchall()
query = select([FactorMaster.factor, FactorMaster.source]).where(FactorMaster.factor.in_(factors))
results = self.engine.execute(query).fetchall()
all_factors = {r[0].strip(): r[1].strip() for r in results} all_factors = {r[0].strip(): r[1].strip() for r in results}
return ','.join(all_factors[k] + '.' + k for k in all_factors) return ','.join(all_factors[k] + '.' + k for k in all_factors)
...@@ -169,22 +191,28 @@ class SqlEngine(object): ...@@ -169,22 +191,28 @@ class SqlEngine(object):
factor_data = pd.read_sql(sql, self.engine) factor_data = pd.read_sql(sql, self.engine)
risk_cov_table = 'risk_cov_' + risk_model risk_cov_table = 'risk_cov_' + risk_model
risk_str = ','.join(risk_cov_table + '.' + f for f in total_risk_factors) meta = MetaData()
meta.reflect(self.engine)
sql = "select FactorID, Factor, {risks} from {risk_table} where Date = '{ref_date}'".format(ref_date=ref_date, risk_cov_table = meta.tables[risk_cov_table]
risks=risk_str,
risk_table=risk_cov_table)
risk_cov_data = pd.read_sql(sql, self.engine).sort_values('FactorID') query = select([risk_cov_table.columns['FactorID'],
risk_cov_table.columns['Factor']]
+ [risk_cov_table.columns[f] for f in total_risk_factors]).where(
risk_cov_table.columns['Date'] == ref_date
)
risk_cov_data = pd.read_sql(query, self.engine).sort_values('FactorID')
total_data = {'risk_cov': risk_cov_data} total_data = {'risk_cov': risk_cov_data}
if benchmark: if benchmark:
sql = "select Code, weight / 100. as weight from index_components " \ query = select([IndexComponent.code, (IndexComponent.weight / 100.).lable('weight')]).where(
"where Date = '{ref_date}' and indexCode = {benchmakr}".format(ref_date=ref_date, and_(
benchmakr=benchmark) IndexComponent.Date == ref_date,
IndexComponent.indexCode == benchmark
benchmark_data = pd.read_sql(sql, self.engine) )
)
benchmark_data = pd.read_sql(query, self.engine)
total_data['benchmark'] = benchmark_data total_data['benchmark'] = benchmark_data
factor_data = pd.merge(factor_data, benchmark_data, how='left', on=['Code']) factor_data = pd.merge(factor_data, benchmark_data, how='left', on=['Code'])
factor_data['weight'] = factor_data['weight'].fillna(0.) factor_data['weight'] = factor_data['weight'].fillna(0.)
...@@ -202,18 +230,9 @@ if __name__ == '__main__': ...@@ -202,18 +230,9 @@ if __name__ == '__main__':
engine = SqlEngine(db_url) engine = SqlEngine(db_url)
ref_date = '2017-07-04' ref_date = '2017-07-04'
factors = engine.fetch_factors_meta()[['factor', 'source']] codes = engine.fetch_codes(ref_date, universe)
factors['factor'] = factors.factor.str.strip() df = engine.fetch_data(ref_date, factors=['EPS', 'CFinc1'], codes=codes)
factors['source'] = factors.source.str.strip()
import datetime as dt
start = dt.datetime.now() print(df)
for i in range(1):
factors = engine.fetch_factors_meta()
codes = engine.fetch_codes('2017-07-04', universe)
total_data = engine.fetch_data(ref_date, factors.factor.tolist(), [1, 5], 905)
print(dt.datetime.now() - start)
print(total_data)
...@@ -2,7 +2,7 @@ cvxopt >= 1.1.9 ...@@ -2,7 +2,7 @@ cvxopt >= 1.1.9
cvxpy >= 0.4.9 cvxpy >= 0.4.9
mysqlclient >= 1.3.10 mysqlclient >= 1.3.10
numpy >= 1.12.1 numpy >= 1.12.1
numba >= 0.30.0 numba >= 0.34.0
scikit-learn >= 0.18.1 scikit-learn >= 0.18.1
scipy >= 0.19.0 scipy >= 0.19.0
simpleutils >= 0.1.0 simpleutils >= 0.1.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment