Commit d42da92f authored by 李煜's avatar 李煜

fisrt commit

parent 086c1f72
Pipeline #57 failed with stages
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.idea/
/ultron/
import pdb
import collections
import json
import time
from pandas.io.json import json_normalize
from datetime import datetime, timedelta
from factor import factor_growth, historical_value, factor_per_share_indicators
from factor.ttm_fundamental import *
from vision.file_unit.balance import Balance
from vision.file_unit.cash_flow import CashFlow
from vision.file_unit.income import Income
from vision.file_unit.valuation import Valuation
from vision.file_unit.industry import Industry
from factor.utillities.trade_date import TradeDate
from ultron.cluster.invoke.cache_data import cache_data
from ultron.utilities.short_uuid import unique_machine, decode
def get_trade_date(trade_date, n):
"""
获取当前时间前n年的时间点,且为交易日,如果非交易日,则往前提取最近的一天。
:param trade_date: 当前交易日
:param n:
:return:
"""
_trade_date = TradeDate()
trade_date_sets = collections.OrderedDict(
sorted(_trade_date._trade_date_sets.items(), key=lambda t: t[0], reverse=False))
time_array = datetime.strptime(str(trade_date), "%Y%m%d")
time_array = time_array - timedelta(days=365) * n
date_time = int(datetime.strftime(time_array, "%Y%m%d"))
if date_time < min(trade_date_sets.keys()):
# print('date_time %s is outof trade_date_sets' % date_time)
return date_time
else:
while date_time not in trade_date_sets:
date_time = date_time - 1
# print('trade_date pre %s year %s' % (n, date_time))
return date_time
def get_basic_growth_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
trade_date_pre_year = get_trade_date(trade_date, 1)
trade_date_pre_year_2 = get_trade_date(trade_date, 2)
trade_date_pre_year_3 = get_trade_date(trade_date, 3)
trade_date_pre_year_4 = get_trade_date(trade_date, 4)
trade_date_pre_year_5 = get_trade_date(trade_date, 5)
# print('trade_date %s' % trade_date)
# print('trade_date_pre_year %s' % trade_date_pre_year)
# print('trade_date_pre_year_2 %s' % trade_date_pre_year_2)
# print('trade_date_pre_year_3 %s' % trade_date_pre_year_3)
# print('trade_date_pre_year_4 %s' % trade_date_pre_year_4)
# print('trade_date_pre_year_5 %s' % trade_date_pre_year_5)
balance_sets = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.total_assets, # 总资产(资产合计)
Balance.total_owner_equities]), # 股东权益合计
[trade_date]))
balance_sets_pre_year = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.total_assets,
Balance.total_owner_equities]),
[trade_date_pre_year]))
balance_sets_pre_year = balance_sets_pre_year.rename(columns={"total_assets": "total_assets_pre_year",
"total_owner_equities": "total_owner_equities_pre_year"})
balance_sets = pd.merge(balance_sets, balance_sets_pre_year, on='symbol')
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.total_profit, # 利润总额
Income.net_profit, # 净利润
Income.operating_cost, # 营业成本
Income.np_parent_company_owners # 归属于母公司所有者的净利润
],
CashFlow._name_: [CashFlow.symbol,
CashFlow.net_finance_cash_flow, # 筹资活动产生的现金流量净额
CashFlow.net_operate_cash_flow, # 经营活动产生的现金流量净额
CashFlow.net_invest_cash_flow, # 投资活动产生的现金流量净额
]
}
# TTM计算连续
ttm_factor_continue = {Income._name_: [Income.symbol,
Income.net_profit, # 净利润
Income.operating_revenue, # 营业收入
Income.operating_cost, # 营业成本
Income.np_parent_company_owners, # 归属于母公司所有者的净利润
]
}
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
ttm_factor_sets_pre_year = get_ttm_fundamental([], ttm_factors, trade_date_pre_year).reset_index()
ttm_factor_sets_pre_year = ttm_factor_sets_pre_year.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_1 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year).reset_index()
ttm_factor_sets_pre_year_1 = ttm_factor_sets_pre_year_1.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_2 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_2).reset_index()
ttm_factor_sets_pre_year_2 = ttm_factor_sets_pre_year_2.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_3 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_3).reset_index()
ttm_factor_sets_pre_year_3 = ttm_factor_sets_pre_year_3.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_4 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_4).reset_index()
ttm_factor_sets_pre_year_4 = ttm_factor_sets_pre_year_4.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_5 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_5).reset_index()
ttm_factor_sets_pre_year_5 = ttm_factor_sets_pre_year_5.drop(columns={"trade_date"})
ttm_factor_sets_pre_year = ttm_factor_sets_pre_year.rename(
columns={"operating_revenue": "operating_revenue_pre_year",
"operating_profit": "operating_profit_pre_year",
"total_profit": "total_profit_pre_year",
"net_profit": "net_profit_pre_year",
"operating_cost": "operating_cost_pre_year",
"np_parent_company_owners": "np_parent_company_owners_pre_year",
"net_finance_cash_flow": "net_finance_cash_flow_pre_year",
"net_operate_cash_flow": "net_operate_cash_flow_pre_year",
"net_invest_cash_flow": "net_invest_cash_flow_pre_year"
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year, on="symbol")
ttm_factor_sets_pre_year_1 = ttm_factor_sets_pre_year_1.rename(
columns={"operating_revenue": "operating_revenue_pre_year_1",
"operating_cost": "operating_cost_pre_year_1",
"net_profit": "net_profit_pre_year_1",
"np_parent_company_owners": "np_parent_company_owners_pre_year_1",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_1, on="symbol")
ttm_factor_sets_pre_year_2 = ttm_factor_sets_pre_year_2.rename(
columns={"operating_revenue": "operating_revenue_pre_year_2",
"operating_cost": "operating_cost_pre_year_2",
"net_profit": "net_profit_pre_year_2",
"np_parent_company_owners": "np_parent_company_owners_pre_year_2",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_2, on="symbol")
ttm_factor_sets_pre_year_3 = ttm_factor_sets_pre_year_3.rename(
columns={"operating_revenue": "operating_revenue_pre_year_3",
"operating_cost": "operating_cost_pre_year_3",
"net_profit": "net_profit_pre_year_3",
"np_parent_company_owners": "np_parent_company_owners_pre_year_3",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_3, on="symbol")
ttm_factor_sets_pre_year_4 = ttm_factor_sets_pre_year_4.rename(
columns={"operating_revenue": "operating_revenue_pre_year_4",
"operating_cost": "operating_cost_pre_year_4",
"net_profit": "net_profit_pre_year_4",
"np_parent_company_owners": "np_parent_company_owners_pre_year_4",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_4, on="symbol")
ttm_factor_sets_pre_year_5 = ttm_factor_sets_pre_year_5.rename(
columns={"operating_revenue": "operating_revenue_pre_year_5",
"operating_cost": "operating_cost_pre_year_5",
"net_profit": "net_profit_pre_year_5",
"np_parent_company_owners": "np_parent_company_owners_pre_year_5",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_5, on="symbol")
return ttm_factor_sets, balance_sets
def get_basic_history_value_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
# PS, PE, PB, PCF
valuation_sets = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.pe,
Valuation.ps,
Valuation.pb,
Valuation.pcf,
Valuation.market_cap,
Valuation.circulating_market_cap]), [trade_date]))
cash_flow_sets = get_fundamentals(add_filter_trade(query(CashFlow._name_,
[CashFlow.symbol,
CashFlow.goods_sale_and_service_render_cash]), [trade_date]))
income_sets = get_fundamentals(add_filter_trade(query(Income._name_,
[Income.symbol,
Income.net_profit]), [trade_date]))
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880',
'801890']
sw_industry = get_fundamentals(add_filter_trade(query(Industry._name_,
[Industry.symbol,
Industry.isymbol]), [trade_date]))
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.np_parent_company_owners],
CashFlow._name_:[CashFlow.symbol,
CashFlow.net_operate_cash_flow]
}
ttm_factors_sum_list = {Income._name_:[Income.symbol,
Income.net_profit, # 净利润
],}
trade_date_2y = get_trade_date(trade_date, 2)
trade_date_3y = get_trade_date(trade_date, 3)
trade_date_4y = get_trade_date(trade_date, 4)
trade_date_5y = get_trade_date(trade_date, 5)
# print(trade_date_2y, trade_date_3y, trade_date_4y, trade_date_5y)
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets_3 = get_ttm_fundamental([], ttm_factors, trade_date_3y).reset_index()
ttm_factor_sets_5 = get_ttm_fundamental([], ttm_factors, trade_date_5y).reset_index()
# ttm 周期内计算需要优化
# ttm_factor_sets_sum = get_ttm_fundamental([], ttm_factors_sum_list, trade_date, 5).reset_index()
factor_sets_sum = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.market_cap,
Valuation.circulating_market_cap,
Valuation.trade_date]),
[trade_date_2y, trade_date_3y, trade_date_4y, trade_date_5y]))
factor_sets_sum_1 = factor_sets_sum.groupby('symbol')['market_cap'].sum().reset_index().rename(columns={"market_cap": "market_cap_sum",})
factor_sets_sum_2 = factor_sets_sum.groupby('symbol')['circulating_market_cap'].sum().reset_index().rename(columns={"circulating_market_cap": "circulating_market_cap_sum",})
# print(factor_sets_sum_1)
# 根据申万一级代码筛选
sw_industry = sw_industry[sw_industry['isymbol'].isin(industry_set)]
# 合并价值数据和申万一级行业
valuation_sets = pd.merge(valuation_sets, sw_industry, on='symbol')
# valuation_sets = pd.merge(valuation_sets, sw_industry, on='symbol', how="outer")
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
ttm_factor_sets_3 = ttm_factor_sets_3.rename(columns={"np_parent_company_owners": "np_parent_company_owners_3"})
ttm_factor_sets_3 = ttm_factor_sets_3.drop(columns={"trade_date"})
ttm_factor_sets_5 = ttm_factor_sets_5.rename(columns={"np_parent_company_owners": "np_parent_company_owners_5"})
ttm_factor_sets_5 = ttm_factor_sets_5.drop(columns={"trade_date"})
# ttm_factor_sets_sum = ttm_factor_sets_sum.rename(columns={"net_profit": "net_profit_5"})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_3, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_5, on='symbol')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_sum, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, factor_sets_sum_1, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, factor_sets_sum_2, on='symbol')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_3, on='symbol', how='outer')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_5, on='symbol', how='outer')
return valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets
def get_basic_scale_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
valuation_sets = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.market_cap,
Valuation.capitalization, # 总股本
Valuation.circulating_market_cap]), #
[trade_date]))
cash_flow_sets = get_fundamentals(add_filter_trade(query(CashFlow._name_,
[CashFlow.symbol,
CashFlow.cash_and_equivalents_at_end, # 现金及现金等价物净增加额
CashFlow.cash_equivalent_increase]), # 期末现金及现金等价物余额(元)
[trade_date]))
income_sets = get_fundamentals(add_filter_trade(query(Income._name_,
[Income.symbol,
Income.basic_eps, # 基本每股收益
Income.diluted_eps, # 稀释每股收益
Income.net_profit,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.total_operating_revenue]), # 营业总收入
[trade_date]))
balance_sets = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.capital_reserve_fund, # 资本公积
Balance.surplus_reserve_fund, # 盈余公积
Balance.total_assets, # 总资产(资产合计)
Balance.dividend_receivable, # 股利
Balance.retained_profit, # 未分配利润
Balance.total_owner_equities]), # 归属于母公司的所有者权益
[trade_date]))
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.np_parent_company_owners, # 归属于母公司所有者股东的净利润
Income.total_operating_revenue], # 营业总收入
CashFlow._name_: [CashFlow.symbol,
CashFlow.net_operate_cash_flow] # 经营活动产生的现金流量净额
}
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets = ttm_factor_sets.rename(columns={"np_parent_company_owners": "np_parent_company_owners_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"net_operate_cash_flow": "net_operate_cash_flow_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"operating_revenue": "operating_revenue_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"operating_profit": "operating_profit_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"total_operating_revenue": "total_operating_revenue_ttm"})
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
return valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets, balance_sets
if __name__ == '__main__':
session1 = str('156099868869460811')
session2 = str('156099868869460812')
# session3 = str('156099868869460813')
# session = str(int(time.time() * 1000000 + datetime.now().microsecond))
start_date = 20100101
end_date = 20190101
count = 10
rebuild = True
_trade_date = TradeDate()
trade_date_sets = _trade_date.trade_date_sets_ago(start_date, end_date, count)
if rebuild is True:
# growth
growth = factor_growth.Growth('factor_growth')
growth.create_dest_tables()
# historical value
history_value = historical_value.HistoricalValue('factor_historical_value')
history_value.create_dest_tables()
# scale
# scale = factor_per_share_indicators.PerShareIndicators('factor_scale')
# scale.create_dest_tables()
for date_index in trade_date_sets:
# factor_growth
start_time = time.time()
ttm_factor_sets, balance_sets = get_basic_growth_data(date_index)
growth_sets = pd.merge(ttm_factor_sets, balance_sets, on='symbol')
cache_data.set_cache(session1, date_index, growth_sets.to_json(orient='records'))
factor_growth.factor_calculate(date_index=date_index,
session=session1)
time1 = time.time()
print('growth_cal_time:{}'.format(time1 - start_time))
# history_value
valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets = get_basic_history_value_data(date_index)
valuation_sets = pd.merge(valuation_sets, income_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, ttm_factor_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, cash_flow_sets, on='symbol')
cache_data.set_cache(session2, date_index, valuation_sets.to_json(orient='records'))
historical_value.factor_calculate(date_index=date_index,
session=session2)
print('history_cal_time:{}'.format(time.time() - time1))
# scale
# valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets, balance_sets = get_basic_scale_data(date_index)
# valuation_sets = pd.merge(valuation_sets, income_sets, on='symbol')
# valuation_sets = pd.merge(valuation_sets, ttm_factor_sets, on='symbol')
# valuation_sets = pd.merge(valuation_sets, cash_flow_sets, on='symbol')
# valuation_sets = pd.merge(valuation_sets, balance_sets, on='symbol')
# cache_data.set_cache(session3, date_index, valuation_sets.to_json(orient='records'))
# factor_per_share_indicators.factor_calculate(date_index=date_index,
# session=session3)
print('---------------------->')
import pdb
from alphamind.api import *
from PyFin.api import *
from PyFin.api import makeSchedule
from sqlalchemy import create_engine, select, and_, or_
from sqlalchemy.pool import NullPool
from factors.models import Alpha191
import pandas as pd
import time
import datetime
import json
import sys
from factors import analysis
from ultron.cluster.invoke.cache_data import cache_data
from ultron.utilities.short_uuid import unique_machine,decode
def fetch_factor(engine191, factor_names, start_date, end_date):
db_columns = []
db_columns.append(Alpha191.trade_date)
db_columns.append(Alpha191.code)
for factor_name in factor_names:
db_columns.append(Alpha191.__dict__[factor_name])
query = select(db_columns).where(
and_(Alpha191.trade_date >= start_date, Alpha191.trade_date <= end_date, ))
return pd.read_sql(query, engine191)
def factor_combination(engine, factors, universe_name_list, start_date, end_date, freq):
universe = None
for name in universe_name_list:
if universe is None:
universe = Universe(name)
else:
universe += Universe(name)
dates = makeSchedule(start_date, end_date, freq, calendar='china.sse')
factor_negMkt = engine.fetch_factor_range(universe, "negMarketValue", dates=dates)
risk_cov, risk_factors = engine.fetch_risk_model_range(universe, dates=dates)
dx_returns = engine.fetch_dx_return_range(universe, dates=dates, horizon=map_freq(freq))
# data combination
total_data = pd.merge(factors, risk_factors, on=['trade_date', 'code'])
total_data = pd.merge(total_data, factor_negMkt, on=['trade_date', 'code'])
total_data = pd.merge(total_data, dx_returns, on=['trade_date', 'code'])
industry_category = engine.fetch_industry_range(universe, dates=dates)
total_data = pd.merge(total_data, industry_category, on=['trade_date', 'code']).dropna()
total_data.dropna(inplace=True)
return total_data
def fetch_factor_sets(**kwargs):
db_info = kwargs["db_info"]
factor_names = kwargs["factor_names"]
start_date = kwargs['start_date']
end_date = kwargs['end_date']
universe_name_list = kwargs['universe_name']
benchmark_code = kwargs['benchmark_code']
freq = kwargs['freq']
engine = SqlEngine(db_info) # alpha-mind engine
engine191 = create_engine(db_info, poolclass=NullPool)
factors = fetch_factor(engine191, factor_names, start_date, end_date)
total_data = factor_combination(engine, factors, universe_name_list, start_date, end_date, freq)
return total_data
#session = str('15609986886946081')
session = str(int(time.time() * 1000000 + datetime.datetime.now().microsecond))
alpha_list = []
for i in range(31,32):
alpha_name = 'alpha_' + str(i)
alpha_list.append(alpha_name)
db_info = 'postgresql+psycopg2://alpha:alpha@180.166.26.82:8889/alpha'
total_data = fetch_factor_sets(db_info=db_info,
factor_names=alpha_list, risk_styles=["SIZE"],
start_date='2010-01-01', end_date='2018-12-31',
universe_name=['zz500','hs300','ashare'],
benchmark_code=905,
freq='3b')
try:
diff_sets = set(total_data.columns) - set(alpha_list)
except:
import pdb
pdb.set_trace()
grouped_list = []
for alpha_name in alpha_list:
print(alpha_name, session)
#pdb.set_trace()
#print(cache_data.get_cache(session, alpha_name))
factors_list = list(diff_sets)
factors_list.append(alpha_name)
factors_sets = total_data[factors_list]
cache_data.set_cache(session, alpha_name, factors_sets.to_json(orient='records'))
analysis.factor_analysis(factor_name=alpha_name,risk_styles=['SIZE'],
benchmark_code=905,
session=session)
import pdb
import collections
import json
import time
from pandas.io.json import json_normalize
from datetime import datetime, timedelta
from factor import factor_growth, historical_value, factor_per_share_indicators
from factor.ttm_fundamental import *
from vision.file_unit.balance import Balance
from vision.file_unit.cash_flow import CashFlow
from vision.file_unit.income import Income
from vision.file_unit.valuation import Valuation
from vision.file_unit.industry import Industry
from factor.utillities.trade_date import TradeDate
from ultron.cluster.invoke.cache_data import cache_data
from ultron.utilities.short_uuid import unique_machine, decode
def get_trade_date(trade_date, n):
"""
获取当前时间前n年的时间点,且为交易日,如果非交易日,则往前提取最近的一天。
:param trade_date: 当前交易日
:param n:
:return:
"""
_trade_date = TradeDate()
trade_date_sets = collections.OrderedDict(
sorted(_trade_date._trade_date_sets.items(), key=lambda t: t[0], reverse=False))
time_array = datetime.strptime(str(trade_date), "%Y%m%d")
time_array = time_array - timedelta(days=365) * n
date_time = int(datetime.strftime(time_array, "%Y%m%d"))
if date_time < min(trade_date_sets.keys()):
# print('date_time %s is outof trade_date_sets' % date_time)
return date_time
else:
while date_time not in trade_date_sets:
date_time = date_time - 1
# print('trade_date pre %s year %s' % (n, date_time))
return date_time
def get_basic_growth_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
trade_date_pre_year = get_trade_date(trade_date, 1)
trade_date_pre_year_2 = get_trade_date(trade_date, 2)
trade_date_pre_year_3 = get_trade_date(trade_date, 3)
trade_date_pre_year_4 = get_trade_date(trade_date, 4)
trade_date_pre_year_5 = get_trade_date(trade_date, 5)
# print('trade_date %s' % trade_date)
# print('trade_date_pre_year %s' % trade_date_pre_year)
# print('trade_date_pre_year_2 %s' % trade_date_pre_year_2)
# print('trade_date_pre_year_3 %s' % trade_date_pre_year_3)
# print('trade_date_pre_year_4 %s' % trade_date_pre_year_4)
# print('trade_date_pre_year_5 %s' % trade_date_pre_year_5)
balance_sets = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.total_assets, # 总资产(资产合计)
Balance.total_owner_equities]), # 股东权益合计
[trade_date]))
balance_sets_pre_year = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.total_assets,
Balance.total_owner_equities]),
[trade_date_pre_year]))
balance_sets_pre_year = balance_sets_pre_year.rename(columns={"total_assets": "total_assets_pre_year",
"total_owner_equities": "total_owner_equities_pre_year"})
balance_sets = pd.merge(balance_sets, balance_sets_pre_year, on='symbol')
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.total_profit, # 利润总额
Income.net_profit, # 净利润
Income.operating_cost, # 营业成本
Income.np_parent_company_owners # 归属于母公司所有者的净利润
],
CashFlow._name_: [CashFlow.symbol,
CashFlow.net_finance_cash_flow, # 筹资活动产生的现金流量净额
CashFlow.net_operate_cash_flow, # 经营活动产生的现金流量净额
CashFlow.net_invest_cash_flow, # 投资活动产生的现金流量净额
]
}
# TTM计算连续
ttm_factor_continue = {Income._name_: [Income.symbol,
Income.net_profit, # 净利润
Income.operating_revenue, # 营业收入
Income.operating_cost, # 营业成本
Income.np_parent_company_owners, # 归属于母公司所有者的净利润
]
}
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
ttm_factor_sets_pre_year = get_ttm_fundamental([], ttm_factors, trade_date_pre_year).reset_index()
ttm_factor_sets_pre_year = ttm_factor_sets_pre_year.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_1 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year).reset_index()
ttm_factor_sets_pre_year_1 = ttm_factor_sets_pre_year_1.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_2 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_2).reset_index()
ttm_factor_sets_pre_year_2 = ttm_factor_sets_pre_year_2.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_3 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_3).reset_index()
ttm_factor_sets_pre_year_3 = ttm_factor_sets_pre_year_3.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_4 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_4).reset_index()
ttm_factor_sets_pre_year_4 = ttm_factor_sets_pre_year_4.drop(columns={"trade_date"})
ttm_factor_sets_pre_year_5 = get_ttm_fundamental([], ttm_factor_continue, trade_date_pre_year_5).reset_index()
ttm_factor_sets_pre_year_5 = ttm_factor_sets_pre_year_5.drop(columns={"trade_date"})
ttm_factor_sets_pre_year = ttm_factor_sets_pre_year.rename(
columns={"operating_revenue": "operating_revenue_pre_year",
"operating_profit": "operating_profit_pre_year",
"total_profit": "total_profit_pre_year",
"net_profit": "net_profit_pre_year",
"operating_cost": "operating_cost_pre_year",
"np_parent_company_owners": "np_parent_company_owners_pre_year",
"net_finance_cash_flow": "net_finance_cash_flow_pre_year",
"net_operate_cash_flow": "net_operate_cash_flow_pre_year",
"net_invest_cash_flow": "net_invest_cash_flow_pre_year"
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year, on="symbol")
ttm_factor_sets_pre_year_1 = ttm_factor_sets_pre_year_1.rename(
columns={"operating_revenue": "operating_revenue_pre_year_1",
"operating_cost": "operating_cost_pre_year_1",
"net_profit": "net_profit_pre_year_1",
"np_parent_company_owners": "np_parent_company_owners_pre_year_1",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_1, on="symbol")
ttm_factor_sets_pre_year_2 = ttm_factor_sets_pre_year_2.rename(
columns={"operating_revenue": "operating_revenue_pre_year_2",
"operating_cost": "operating_cost_pre_year_2",
"net_profit": "net_profit_pre_year_2",
"np_parent_company_owners": "np_parent_company_owners_pre_year_2",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_2, on="symbol")
ttm_factor_sets_pre_year_3 = ttm_factor_sets_pre_year_3.rename(
columns={"operating_revenue": "operating_revenue_pre_year_3",
"operating_cost": "operating_cost_pre_year_3",
"net_profit": "net_profit_pre_year_3",
"np_parent_company_owners": "np_parent_company_owners_pre_year_3",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_3, on="symbol")
ttm_factor_sets_pre_year_4 = ttm_factor_sets_pre_year_4.rename(
columns={"operating_revenue": "operating_revenue_pre_year_4",
"operating_cost": "operating_cost_pre_year_4",
"net_profit": "net_profit_pre_year_4",
"np_parent_company_owners": "np_parent_company_owners_pre_year_4",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_4, on="symbol")
ttm_factor_sets_pre_year_5 = ttm_factor_sets_pre_year_5.rename(
columns={"operating_revenue": "operating_revenue_pre_year_5",
"operating_cost": "operating_cost_pre_year_5",
"net_profit": "net_profit_pre_year_5",
"np_parent_company_owners": "np_parent_company_owners_pre_year_5",
})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_pre_year_5, on="symbol")
return ttm_factor_sets, balance_sets
def get_basic_history_value_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
# PS, PE, PB, PCF
valuation_sets = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.pe,
Valuation.ps,
Valuation.pb,
Valuation.pcf,
Valuation.market_cap,
Valuation.circulating_market_cap]), [trade_date]))
cash_flow_sets = get_fundamentals(add_filter_trade(query(CashFlow._name_,
[CashFlow.symbol,
CashFlow.goods_sale_and_service_render_cash]), [trade_date]))
income_sets = get_fundamentals(add_filter_trade(query(Income._name_,
[Income.symbol,
Income.net_profit]), [trade_date]))
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880',
'801890']
sw_industry = get_fundamentals(add_filter_trade(query(Industry._name_,
[Industry.symbol,
Industry.isymbol]), [trade_date]))
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.np_parent_company_owners],
CashFlow._name_:[CashFlow.symbol,
CashFlow.net_operate_cash_flow]
}
ttm_factors_sum_list = {Income._name_:[Income.symbol,
Income.net_profit, # 净利润
],}
trade_date_2y = get_trade_date(trade_date, 2)
trade_date_3y = get_trade_date(trade_date, 3)
trade_date_4y = get_trade_date(trade_date, 4)
trade_date_5y = get_trade_date(trade_date, 5)
# print(trade_date_2y, trade_date_3y, trade_date_4y, trade_date_5y)
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets_3 = get_ttm_fundamental([], ttm_factors, trade_date_3y).reset_index()
ttm_factor_sets_5 = get_ttm_fundamental([], ttm_factors, trade_date_5y).reset_index()
# ttm 周期内计算需要优化
# ttm_factor_sets_sum = get_ttm_fundamental([], ttm_factors_sum_list, trade_date, 5).reset_index()
factor_sets_sum = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.market_cap,
Valuation.circulating_market_cap,
Valuation.trade_date]),
[trade_date_2y, trade_date_3y, trade_date_4y, trade_date_5y]))
factor_sets_sum_1 = factor_sets_sum.groupby('symbol')['market_cap'].sum().reset_index().rename(columns={"market_cap": "market_cap_sum",})
factor_sets_sum_2 = factor_sets_sum.groupby('symbol')['circulating_market_cap'].sum().reset_index().rename(columns={"circulating_market_cap": "circulating_market_cap_sum",})
# print(factor_sets_sum_1)
# 根据申万一级代码筛选
sw_industry = sw_industry[sw_industry['isymbol'].isin(industry_set)]
# 合并价值数据和申万一级行业
valuation_sets = pd.merge(valuation_sets, sw_industry, on='symbol')
# valuation_sets = pd.merge(valuation_sets, sw_industry, on='symbol', how="outer")
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
ttm_factor_sets_3 = ttm_factor_sets_3.rename(columns={"np_parent_company_owners": "np_parent_company_owners_3"})
ttm_factor_sets_3 = ttm_factor_sets_3.drop(columns={"trade_date"})
ttm_factor_sets_5 = ttm_factor_sets_5.rename(columns={"np_parent_company_owners": "np_parent_company_owners_5"})
ttm_factor_sets_5 = ttm_factor_sets_5.drop(columns={"trade_date"})
# ttm_factor_sets_sum = ttm_factor_sets_sum.rename(columns={"net_profit": "net_profit_5"})
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_3, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_5, on='symbol')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_sum, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, factor_sets_sum_1, on='symbol')
ttm_factor_sets = pd.merge(ttm_factor_sets, factor_sets_sum_2, on='symbol')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_3, on='symbol', how='outer')
# ttm_factor_sets = pd.merge(ttm_factor_sets, ttm_factor_sets_5, on='symbol', how='outer')
return valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets
def get_basic_scale_data(trade_date):
"""
获取基础数据
按天获取当天交易日所有股票的基础数据
:param trade_date: 交易日
:return:
"""
valuation_sets = get_fundamentals(add_filter_trade(query(Valuation._name_,
[Valuation.symbol,
Valuation.market_cap,
Valuation.capitalization, # 总股本
Valuation.circulating_market_cap]), #
[trade_date]))
cash_flow_sets = get_fundamentals(add_filter_trade(query(CashFlow._name_,
[CashFlow.symbol,
CashFlow.cash_and_equivalents_at_end, # 现金及现金等价物净增加额
CashFlow.cash_equivalent_increase]), # 期末现金及现金等价物余额(元)
[trade_date]))
income_sets = get_fundamentals(add_filter_trade(query(Income._name_,
[Income.symbol,
Income.basic_eps, # 基本每股收益
Income.diluted_eps, # 稀释每股收益
Income.net_profit,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.total_operating_revenue]), # 营业总收入
[trade_date]))
balance_sets = get_fundamentals(add_filter_trade(query(Balance._name_,
[Balance.symbol,
Balance.capital_reserve_fund, # 资本公积
Balance.surplus_reserve_fund, # 盈余公积
Balance.total_assets, # 总资产(资产合计)
Balance.dividend_receivable, # 股利
Balance.retained_profit, # 未分配利润
Balance.total_owner_equities]), # 归属于母公司的所有者权益
[trade_date]))
# TTM计算
ttm_factors = {Income._name_: [Income.symbol,
Income.operating_revenue, # 营业收入
Income.operating_profit, # 营业利润
Income.np_parent_company_owners, # 归属于母公司所有者股东的净利润
Income.total_operating_revenue], # 营业总收入
CashFlow._name_: [CashFlow.symbol,
CashFlow.net_operate_cash_flow] # 经营活动产生的现金流量净额
}
ttm_factor_sets = get_ttm_fundamental([], ttm_factors, trade_date).reset_index()
ttm_factor_sets = ttm_factor_sets.rename(columns={"np_parent_company_owners": "np_parent_company_owners_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"net_operate_cash_flow": "net_operate_cash_flow_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"operating_revenue": "operating_revenue_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"operating_profit": "operating_profit_ttm"})
ttm_factor_sets = ttm_factor_sets.rename(columns={"total_operating_revenue": "total_operating_revenue_ttm"})
ttm_factor_sets = ttm_factor_sets.drop(columns={"trade_date"})
return valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets, balance_sets
if __name__ == '__main__':
session1 = str('156099868869460811')
session2 = str('156099868869460812')
session3 = str('156099868869460813')
# session = str(int(time.time() * 1000000 + datetime.now().microsecond))
start_date = 20100101
end_date = 20190101
count = 10
rebuild = True
_trade_date = TradeDate()
trade_date_sets = _trade_date.trade_date_sets_ago(start_date, end_date, count)
if rebuild is True:
growth = factor_growth.Growth('factor_growth')
growth.create_dest_tables()
history_value = historical_value.HistoricalValue('factor_historical_value')
history_value.create_dest_tables()
scale = factor_per_share_indicators.PerShareIndicators('factor_scale')
scale.create_dest_tables()
for date_index in trade_date_sets:
# factor_growth
start_time = time.time()
ttm_factor_sets, balance_sets = get_basic_growth_data(date_index)
growth_sets = pd.merge(ttm_factor_sets, balance_sets, on='symbol')
cache_data.set_cache(session1, date_index, growth_sets.to_json(orient='records'))
factor_growth.factor_calculate(date_index=date_index,
session=session1)
time1 = time.time()
print('growth_cal_time:{}'.format(time1 - start_time))
# history_value
valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets = get_basic_history_value_data(date_index)
valuation_sets = pd.merge(valuation_sets, income_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, ttm_factor_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, cash_flow_sets, on='symbol')
cache_data.set_cache(session2, date_index, valuation_sets.to_json(orient='records'))
historical_value.factor_calculate(date_index=date_index,
session=session2)
print('history_cal_time:{}'.format(time.time() - time1))
# scale
valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets, balance_sets = get_basic_scale_data(date_index)
valuation_sets = pd.merge(valuation_sets, income_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, ttm_factor_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, cash_flow_sets, on='symbol')
valuation_sets = pd.merge(valuation_sets, balance_sets, on='symbol')
cache_data.set_cache(session3, date_index, valuation_sets.to_json(orient='records'))
factor_per_share_indicators.factor_calculate(date_index=date_index,
session=session3)
print('---------------------->')
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: ??
@author: li
@file: __init__.py.py
@time: 2019-06-30 19:04
"""
from ultron.cluster.invoke.app_engine import create_app
app = create_app('factor', ['factor.factor_growth', 'factor.historical_value'])
#!/usr/bin/env python
# coding=utf-8
RECORD_BASE_DIR = '/home/vision/data/vision/'
# RECORD_BASE_DIR = 'C://Users/zzh/git/rongliang/basic-data/file_data/'
source_db_host = '192.168.100.151'
source_db_port = '1433'
source_db_database = 'QADB'
source_db_user = 'read'
source_db_pwd = 'read'
destination_db_host = '10.15.97.128'
destination_db_port = '3306'
destination_db_database = 'vision'
destination_db_user = 'root'
destination_db_pwd = '1234'
# destination_db_host = 'db1.irongliang.com'
# destination_db_port = '3306'
# destination_db_database = 'vision'
# destination_db_user = 'rl_sync'
# destination_db_pwd = 'rl_sync_2019'
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
import os
import sys
import numpy as np
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
sys.path.append('..')
from factor.utillities.trade_date import TradeDate
from factor import config
class FactorBase(object):
def __init__(self, name):
destination_db = '''mysql+mysqlconnector://{0}:{1}@{2}:{3}/{4}'''.format(config.destination_db_user,
config.destination_db_pwd,
config.destination_db_host,
config.destination_db_port,
config.destination_db_database)
self._name = name
self._destination = sa.create_engine(destination_db)
self._dest_session = sessionmaker(bind=self._destination, autocommit=False, autoflush=True)
self._trade_date = TradeDate()
self._dir = config.RECORD_BASE_DIR + 'factor/' + str(self._name)
def _create_index(self):
session = self._dest_session()
indexs = [
'''CREATE INDEX {0}_trade_date_symbol_index ON `{0}` (trade_date, symbol);'''.format(self._name)
]
for sql in indexs:
session.execute(sql)
session.commit()
session.close()
def _create_tables(self, create_sql, drop_sql):
session = self._dest_session()
if drop_sql is not None:
session.execute(drop_sql)
session.execute(create_sql)
session.commit()
session.close()
self._create_index()
def _storage_data(self, data_flow, trade_date):
data_flow = data_flow.where(pd.notnull(data_flow), None)
data_flow = data_flow.replace([-np.inf, np.inf], 0).fillna(value=0)
# 保存本地
if not os.path.exists(self._dir):
os.makedirs(self._dir)
file_name = self._dir + '/' + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
data_flow.to_csv(file_name, encoding='UTF-8')
try:
self.delete_trade_data(trade_date)
data_flow.to_sql(name=self._name, con=self._destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(data_flow)
def delete_trade_data(self, trade_date):
session = self._dest_session()
session.execute('''delete from `{0}` where trade_date={1}'''.format(self._name, trade_date))
session.commit()
def insert_or_update(self, datas):
session = self._dest_session()
for i in range(datas.shape[0]):
data = datas.iloc[i]
values = ''
update = ''
title = ''
for j in range(len(data)):
index = data.index[j]
value = str(data[j]).replace("'", "\\'")
title += """`{0}`,""".format(index)
values += """'{0}',""".format(value)
update += """`{0}`='{1}',""".format(index, value)
sql = '''insert into {0} ({1}) values({2}) ON DUPLICATE KEY UPDATE {3}'''.format(self._name,
title[0:-1],
values[0:-1],
update[0:-1]
)
sql = sql.replace("'nan'", 'Null').replace("'None'", 'Null')
session.execute(sql)
session.commit()
session.close()
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: ??
@author: li
@file: factor_growth.py
@time: 2019-02-12 10:03
"""
import json
from datetime import datetime, timedelta
from sklearn import linear_model
from pandas.io.json import json_normalize
from factor import app
from factor.factor_base import FactorBase
from factor.ttm_fundamental import *
from vision.fm.signletion_engine import *
from factor.utillities import trade_date as td
from ultron.cluster.invoke.cache_data import cache_data
class Growth(FactorBase):
def __init__(self, name):
super(Growth, self).__init__(name)
def create_dest_tables(self):
"""
创建数据库表
:return:
"""
drop_sql = """drop table if exists `{0}`""".format(self._name)
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`NetAsset1YChg` decimal(19,4),
`TotalAsset1YChg` decimal(19,4),
`ORev1YChgTTM` decimal(19,4),
`OPft1YChgTTM` decimal(19,4),
`GrPft1YChgTTM` decimal(19,4),
`NetPft1YChgTTM` decimal(19,4),
`NetPftAP1YChgTTM` decimal(19,4),
`NetPft3YChgTTM` decimal(19,4),
`NetPft5YChgTTM` decimal(19,4),
`ORev3YChgTTM` decimal(19,4),
`ORev5YChgTTM` decimal(19,4),
`NetCF1YChgTTM` decimal(19,4),
`NetPftAPNNRec1YChgTTM` decimal(19,4),
`NetPft5YAvgChgTTM` decimal(19,4),
`StdUxpErn1YTTM` decimal(19,4),
`StdUxpGrPft1YTTM` decimal(19,4),
`FCF1YChgTTM` decimal(19,4),
`ICF1YChgTTM` decimal(19,4),
`OCF1YChgTTM` decimal(19,4),
`Sales5YChgTTM` decimal(19,4),
PRIMARY KEY(`id`,`trade_date`,`symbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self._name)
super(Growth, self)._create_tables(create_sql, drop_sql)
def historical_net_asset_grow_rate(self, tp_historical_growth):
"""
净资产增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'total_owner_equities', 'total_owner_equities_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1.0 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetAsset1YChg'] = historical_growth[['total_owner_equities',
'total_owner_equities_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['total_owner_equities', 'total_owner_equities_pre_year'], axis=1)
# factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return historical_growth
def historical_total_asset_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
总资产增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'total_assets', 'total_assets_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1.0 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['TotalAsset1YChg'] = historical_growth[
['total_assets', 'total_assets_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['total_assets', 'total_assets_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_operating_revenue_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
营业收入增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'operating_revenue_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1.0 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['ORev1YChgTTM'] = historical_growth[
['operating_revenue', 'operating_revenue_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['operating_revenue', 'operating_revenue_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_operating_profit_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
营业利润增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_profit', 'operating_profit_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1.0 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['OPft1YChgTTM'] = historical_growth[
['operating_profit', 'operating_profit_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['operating_profit', 'operating_profit_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_total_profit_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
利润总额增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'total_profit', 'total_profit_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['GrPft1YChgTTM'] = historical_growth[
['total_profit', 'total_profit_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['total_profit', 'total_profit_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_net_profit_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
净利润增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetPft1YChgTTM'] = historical_growth[
['net_profit', 'net_profit_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_profit', 'net_profit_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_np_parent_company_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
归属母公司股东的净利润增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'np_parent_company_owners', 'np_parent_company_owners_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetPftAP1YChgTTM'] = historical_growth[
['np_parent_company_owners', 'np_parent_company_owners_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(
columns=['np_parent_company_owners', 'np_parent_company_owners_pre_year'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_net_profit_grow_rate_3y(self, tp_historical_growth, factor_historical_growth):
"""
净利润3年复合增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year_3']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: (pow((x[0] / x[1]), 1 / 3.0) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetPft3YChgTTM'] = historical_growth[
['net_profit', 'net_profit_pre_year_3']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_profit', 'net_profit_pre_year_3'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_net_profit_grow_rate_5y(self, tp_historical_growth, factor_historical_growth):
"""
净利润5年复合增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year_5']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: (pow((x[0] / x[1]), 1 / 5.0) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetPft5YChgTTM'] = historical_growth[
['net_profit', 'net_profit_pre_year_5']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_profit', 'net_profit_pre_year_5'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_operating_revenue_grow_rate_3y(self, tp_historical_growth, factor_historical_growth):
"""
营业收入3年复合增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'operating_revenue_pre_year_3']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: (pow((x[0] / x[1]), 1 / 3.0) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['ORev3YChgTTM'] = historical_growth[
['operating_revenue', 'operating_revenue_pre_year_3']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['operating_revenue', 'operating_revenue_pre_year_3'],
axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_operating_revenue_grow_rate_5y(self, tp_historical_growth, factor_historical_growth):
"""
营业收入5年复合增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'operating_revenue_pre_year_5']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: (pow((x[0] / x[1]), 1 / 5.0) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['ORev5YChgTTM'] = historical_growth[
['operating_revenue', 'operating_revenue_pre_year_5']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['operating_revenue', 'operating_revenue_pre_year_5'],
axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_net_cash_flow_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
缺数据
净现金流量增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'n_change_in_cash', 'n_change_in_cash_pre']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetCF1YChgTTM'] = historical_growth[
['n_change_in_cash', 'n_change_in_cash_pre']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['n_change_in_cash', 'n_change_in_cash_pre'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_np_parent_company_cut_yoy(self, tp_historical_growth, factor_historical_growth):
"""
缺失数据
归属母公司股东的净利润(扣除)同比增长
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'ni_attr_p_cut', 'ni_attr_p_cut_pre']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[0] is not None and x[1] is not None else None)
historical_growth['NetPftAPNNRec1YChgTTM'] = historical_growth[
['ni_attr_p_cut', 'ni_attr_p_cut_pre']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['ni_attr_p_cut', 'ni_attr_p_cut_pre'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_egro_(self, tp_historical_growth, factor_historical_growth):
"""
5年收益增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
if len(tp_historical_growth) < 3:
return
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2',
'net_profit_pre_year_3', 'net_profit_pre_year_4']
regr = linear_model.LinearRegression()
# 读取五年的时间和净利润
historical_growth = tp_historical_growth.loc[:, columns_lists]
regr.fit(tp_historical_growth['x'].values.reshape(-1, 1), tp_historical_growth['y'].values)
weight = regr.coef_
mean = tp_historical_growth['y'].mean()
def historical_egro(self, tp_historical_growth, factor_historical_growth):
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2',
'net_profit_pre_year_3', 'net_profit_pre_year_4']
regr = linear_model.LinearRegression()
# 读取五年的时间和净利润
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
def has_non(a):
tmp = 0
for i in a.tolist():
for j in i:
if j is None or j == 'nan':
tmp += 1
if tmp >= 1:
return True
else:
return False
def fun2(x):
aa = x[['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2', 'net_profit_pre_year_3', 'net_profit_pre_year_4']].fillna('nan').values.reshape(-1, 1)
if has_non(aa):
return None
else:
regr.fit(aa, range(0, 5))
return regr.coef_[-1]
# fun = lambda x: (regr.coef_[-1] if regr.fit(x[['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2',
# 'net_profit_pre_year_3', 'net_profit_pre_year_4']].values.reshape(-1, 1),
# range(0, 5)) else None)
historical_growth['coefficient'] = historical_growth.apply(fun2, axis=1)
historical_growth['mean'] = historical_growth[['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2', 'net_profit_pre_year_3', 'net_profit_pre_year_4']].fillna('nan').mean(axis=1)
fun1 = lambda x: x[0] / abs(x[1]) if x[1] != 0 and x[1] is not None and x[0] is not None else None
historical_growth['NetPft5YAvgChgTTM'] = historical_growth[['coefficient', 'mean']].apply(fun1, axis=1)
# historical_growth = historical_growth.drop(
# columns=['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2', 'net_profit_pre_year_3',
# 'net_profit_pre_year_4', 'coefficient', 'mean'], axis=1)
historical_growth = historical_growth[['symbol', 'NetPft5YAvgChgTTM']]
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_sue(self, tp_historical_growth, factor_historical_growth):
"""
未预期盈余
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2',
'net_profit_pre_year_3', 'net_profit_pre_year_4']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
historical_growth['mean'] = historical_growth[['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2', 'net_profit_pre_year_3', 'net_profit_pre_year_4']].fillna(0.0).mean(axis=1)
historical_growth['std'] = historical_growth[['net_profit', 'net_profit_pre_year_1', 'net_profit_pre_year_2', 'net_profit_pre_year_3', 'net_profit_pre_year_4']].fillna(0.0).std(axis=1)
fun = lambda x: (x[0] - x[1]) / x[2] if x[2] !=0 and x[1] is not None and x[0] is not None and x[2] is not None else None
historical_growth['StdUxpErn1YTTM'] = historical_growth[['net_profit', 'mean', 'std']].apply(fun, axis=1)
# historical_growth = historical_growth.drop(columns=['net_profit', 'std', 'mean'], axis=1)
historical_growth = historical_growth[['symbol', 'StdUxpErn1YTTM']]
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_suoi(self, tp_historical_growth, factor_historical_growth):
"""
未预期毛利
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2',
'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4', 'operating_revenue_pre_year_5',
'operating_cost', 'operating_cost_pre_year_1', 'operating_cost_pre_year_2',
'operating_cost_pre_year_3', 'operating_cost_pre_year_4', 'operating_cost_pre_year_5']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
historical_growth['gi_1'] = historical_growth['operating_revenue'] - historical_growth['operating_cost']
historical_growth['gi_2'] = historical_growth['operating_revenue_pre_year_2'] - historical_growth[
'operating_cost_pre_year_2']
historical_growth['gi_3'] = historical_growth['operating_revenue_pre_year_3'] - historical_growth[
'operating_cost_pre_year_3']
historical_growth['gi_4'] = historical_growth['operating_revenue_pre_year_4'] - historical_growth[
'operating_cost_pre_year_4']
historical_growth['gi_5'] = historical_growth['operating_revenue_pre_year_5'] - historical_growth[
'operating_cost_pre_year_5']
historical_growth['mean'] = historical_growth[['gi_2', 'gi_3', 'gi_4', 'gi_5']].mean(axis=1)
historical_growth['std'] = historical_growth[['gi_2', 'gi_3', 'gi_4', 'gi_5']].std(axis=1)
fun = lambda x: ((x[0] - x[1]) / x[2] if x[2] != 0 and x[1] is not None and x[0] is not None and x[2] is not None else None)
# historical_growth['StdUxpGrPft1YTTM'] = (historical_growth['gi_1'] - historical_growth['mean']) / historical_growth['std']
historical_growth['StdUxpGrPft1YTTM'] = historical_growth[['gi_1', 'mean', 'std']].apply(fun, axis=1)
# historical_growth = historical_growth.drop(columns=['operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2',
# 'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4',
# 'operating_cost', 'operating_cost_pre_year_1', 'operating_cost_pre_year_2',
# 'operating_cost_pre_year_3', 'operating_cost_pre_year_4', 'std', 'mean',
# 'gi_1', 'gi_2', 'gi_3', 'gi_4', 'gi_5'], axis=1)
historical_growth = historical_growth[['symbol', 'StdUxpGrPft1YTTM']]
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_financing_cash_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
筹资活动产生的现金流量净额增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_finance_cash_flow', 'net_finance_cash_flow_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['FCF1YChgTTM'] = historical_growth[
['net_finance_cash_flow', 'net_finance_cash_flow_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_finance_cash_flow', 'net_finance_cash_flow_pre_year'],
axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_oper_cash_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
经营活动产生的现金流量净额
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_operate_cash_flow', 'net_operate_cash_flow_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['OCF1YChgTTM'] = historical_growth[
['net_operate_cash_flow', 'net_operate_cash_flow_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_operate_cash_flow', 'net_operate_cash_flow_pre_year'],
axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_invest_cash_grow_rate(self, tp_historical_growth, factor_historical_growth):
"""
投资活动产生的现金流量净额
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'net_invest_cash_flow', 'net_invest_cash_flow_pre_year']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['ICF1YChgTTM'] = historical_growth[
['net_invest_cash_flow', 'net_invest_cash_flow_pre_year']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['net_invest_cash_flow', 'net_invest_cash_flow_pre_year'],
axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def historical_sgro(self, tp_historical_growth, factor_historical_growth):
"""
五年营业收入增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2',
'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4']
regr = linear_model.LinearRegression()
# 读取五年的时间和净利润
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
def has_non(a):
tmp = 0
for i in a.tolist():
for j in i:
if j is None or j == 'nan':
tmp += 1
if tmp >= 1:
return True
else:
return False
def fun2(x):
aa = x[['operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2', 'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4']].fillna('nan').values.reshape(-1, 1)
if has_non(aa):
return None
else:
regr.fit(aa, range(0, 5))
return regr.coef_[-1]
# fun = lambda x: (regr.coef_[-1] if regr.fit(x[['operating_revenue', 'operating_revenue_pre_year_1',
# 'operating_revenue_pre_year_2', 'operating_revenue_pre_year_3',
# 'operating_revenue_pre_year_4']].values.reshape(-1, 1),
# range(0, 5)) else None)
historical_growth['coefficient'] = historical_growth.apply(fun2, axis=1)
historical_growth['mean'] = historical_growth[['operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2', 'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4']].fillna(0.0).mean(axis=1)
fun1 = lambda x: x[0] / abs(x[1]) if x[1] is not None and x[0] is not None and x[1] != 0 else None
historical_growth['Sales5YChgTTM'] = historical_growth[['coefficient', 'mean']].apply(fun1, axis=1)
historical_growth = historical_growth.drop(
columns=['operating_revenue', 'operating_revenue_pre_year_1', 'operating_revenue_pre_year_2',
'operating_revenue_pre_year_3', 'operating_revenue_pre_year_4', 'coefficient', 'mean'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
# 分析师预期增长
def fsalesg(self, tp_historical_growth, factor_historical_growth):
"""
未来预期营收增长
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'sales_predict', 'sales_real']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] - x[1]) / abs(x[1]) if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['historical_financing_fearng_latest'] = historical_growth[
['sales_predict', 'sales_real']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['sales_predict', 'sales_real'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def fearng(self, tp_historical_growth, factor_historical_growth):
"""
未来预期盈利增长
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'earnings_predict', 'earnings_real']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] - x[1]) / abs(x[1]) if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['historical_financing_fearng_latest'] = historical_growth[
['earnings_predict', 'earnings_real']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['earnings_predict', 'earnings_real'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def egibs_long(self, tp_historical_growth, factor_historical_growth):
"""
净利润增长率
:param tp_historical_growth:
:param factor_historical_growth:
:return:
"""
columns_lists = ['symbol', 'earnings', 'earnings_pre_year_3']
historical_growth = tp_historical_growth.loc[:, columns_lists]
if len(historical_growth) <= 0:
return
fun = lambda x: ((x[0] / x[1]) - 1 if x[1] and x[1] != 0 and x[1] is not None and x[0] is not None else None)
historical_growth['NetPft1YChgTTM'] = historical_growth[
['earnings', 'earnings_pre_year_3']].apply(fun, axis=1)
historical_growth = historical_growth.drop(columns=['earnings', 'earnings_pre_year_3'], axis=1)
factor_historical_growth = pd.merge(factor_historical_growth, historical_growth, on='symbol')
return factor_historical_growth
def calculate(trade_date, growth_sets, growth):
"""
:param growth: 成长类
:param growth_sets: 基础数据
:param trade_date: 交易日
:return:
"""
if len(growth_sets) <= 0:
print("%s has no data" % trade_date)
return
# psindu
factor_historical_growth = growth.historical_net_asset_grow_rate(growth_sets)
factor_historical_growth = growth.historical_total_asset_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_operating_revenue_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_operating_profit_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_total_profit_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_net_profit_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_np_parent_company_grow_rate(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_net_profit_grow_rate_3y(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_net_profit_grow_rate_5y(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_operating_revenue_grow_rate_3y(growth_sets, factor_historical_growth)
factor_historical_growth = growth.historical_operating_revenue_grow_rate_5y(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_net_cash_flow_grow_rate(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_np_parent_company_cut_yoy(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_egro(factor_historical_growth, factor_historical_growth)
factor_historical_growth = growth.historical_sue(factor_historical_growth, factor_historical_growth)
factor_historical_growth = growth.historical_suoi(factor_historical_growth, factor_historical_growth)
factor_historical_growth = growth.historical_financing_cash_grow_rate(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_oper_cash_grow_rate(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_invest_cash_grow_rate(factor_historical_growth,
factor_historical_growth)
factor_historical_growth = growth.historical_sgro(factor_historical_growth, factor_historical_growth)
factor_historical_growth = factor_historical_growth[['symbol',
'NetAsset1YChg',
'TotalAsset1YChg',
'ORev1YChgTTM',
'OPft1YChgTTM',
'GrPft1YChgTTM',
'NetPft1YChgTTM',
'NetPftAP1YChgTTM',
'NetPft3YChgTTM',
'NetPft5YChgTTM',
'ORev3YChgTTM',
'ORev5YChgTTM',
'NetCF1YChgTTM',
'NetPftAPNNRec1YChgTTM',
'NetPft5YAvgChgTTM',
'StdUxpErn1YTTM',
'StdUxpGrPft1YTTM',
'FCF1YChgTTM',
'ICF1YChgTTM',
'OCF1YChgTTM',
'Sales5YChgTTM']]
factor_historical_growth['id'] = factor_historical_growth['symbol'] + str(trade_date)
factor_historical_growth['trade_date'] = str(trade_date)
growth._storage_data(factor_historical_growth, trade_date)
def do_update(growth, growth_sets, start_date, end_date, count):
# 读取本地交易日
_trade_date = td.TradeDate()
trade_date_sets = _trade_date.trade_date_sets_ago(start_date, end_date, count)
for trade_date in trade_date_sets:
calculate(trade_date, growth_sets, growth)
print('----->')
def run(args):
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor_growth = Growth('factor_growth')
processor_growth.create_dest_tables()
do_update(args.start_date, end_date, args.count, args.growth_sets, processor_growth)
if args.update:
processor_growth = Growth('factor_growth')
do_update(args.start_date, end_date, args.count, args.growth_sets, processor_growth)
@app.task()
def factor_calculate(**kwargs):
print("growth_kwargs: {}".format(kwargs))
date_index = kwargs['date_index']
session = kwargs['session']
growth = Growth('factor_growth') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错
content = cache_data.get_cache(session, date_index)
total_growth_data = json_normalize(json.loads(str(content, encoding='utf8')))
print("len_total_growth_data {}".format(len(total_growth_data)))
calculate(date_index, total_growth_data, growth)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
每股指标
@version: ??
@author: li
@file: factor_per_share_indicators.py
@time: 2019-02-12 10:02
"""
import sys
import json
from pandas.io.json import json_normalize
sys.path.append("..")
from factor.ttm_fundamental import *
from factor.factor_base import FactorBase
from vision.fm.signletion_engine import *
from ultron.cluster.invoke.cache_data import cache_data
class PerShareIndicators(FactorBase):
def __init__(self, name):
super(PerShareIndicators, self).__init__(name)
def create_dest_tables(self):
"""
创建数据库表
:return:
"""
drop_sql = """drop table if exists `{0}`""".format(self._name)
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`eps_latest` decimal(19,4),
`diluted_eps_ttm` decimal(19,4),
`cash_equivalent_ps_latest` decimal(19,4),
`dividend_ps_latest` decimal(19,4),
`eps_ttm` decimal(19,4),
`net_asset_ps_latest` decimal(19,4),
`tor_ps_latest` decimal(19,4),
`tor_ps_ttm` decimal(19,4),
`operating_revenue_ps_ttm` decimal(19,4),
`operating_revenue_ps_latest` decimal(19,4),
`operating_profit_ps_ttm` decimal(19,4),
`operating_profit_ps_latest` decimal(19,4),
`capital_surplus_fund_ps_latest` decimal(19,4),
`surplus_reserve_fund_ps_latest` decimal(19,4),
`undivided_pro_fit_ps_latest` decimal(19,4),
`retained_earnings_ps_latest` decimal(19,4),
`oper_cash_flow_ps_ttm` decimal(19,4),
`cash_flow_ps_ttm` decimal(19,4),
`enterprise_fcfps_latest` decimal(19,4),
`shareholder_fcfps_latest` decimal(19,4),
PRIMARY KEY(`id`,`trade_date`,`symbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self._name)
super(PerShareIndicators, self)._create_tables(create_sql, drop_sql)
def eps(self, tp_share_indicators, factor_share_indicators):
"""
基本每股收益
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'basic_eps']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['eps_latest'] = share_indicators['basic_eps']
# share_indicators = share_indicators.drop(columns=['basic_eps'], axis=1)
share_indicators = share_indicators[['symbol', 'eps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def diluted_eps(self, tp_share_indicators, factor_share_indicators):
"""
稀释每股收益
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'diluted_eps']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['diluted_eps_ttm'] = share_indicators['diluted_eps']
# share_indicators = share_indicators.drop(columns=['diluted_eps'], axis=1)
share_indicators = share_indicators[['symbol', 'diluted_eps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def cash_equivalent_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股现金及现金等价物余额
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'capitalization', 'cash_and_equivalents_at_end']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[1] / x[0] if x[0] and x[0] != 0 else None)
share_indicators['cash_equivalent_ps_latest'] = share_indicators[
['capitalization', 'cash_and_equivalents_at_end']].apply(fun, axis=1)
# share_indicators = share_indicators.drop(columns=['cash_and_equivalents_at_end'], axis=1)
share_indicators = share_indicators[['symbol', 'cash_equivalent_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def dividend_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股股利(税前)
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'dividend_receivable']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['dividend_ps_latest'] = share_indicators['dividend_receivable']
# share_indicators = share_indicators.drop(columns=['dividend_receivable'], axis=1)
share_indicators = share_indicators[['symbol', 'dividend_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def eps_ttm(self, tp_share_indicators, factor_share_indicators):
"""
每股收益 TTM
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'np_parent_company_owners_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else 0)
share_indicators['eps_ttm'] = share_indicators[['np_parent_company_owners_ttm', 'capitalization']].apply(fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['np_parent_company_owners_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'eps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def net_asset_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股净资产
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'total_owner_equities', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['net_asset_ps_latest'] = share_indicators[['total_owner_equities', 'capitalization']].apply(fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['total_owner_equities'], axis=1)
share_indicators = share_indicators[['symbol', 'net_asset_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def tor_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股营业总收入
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'total_operating_revenue_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['tor_ps_ttm'] = share_indicators[
['total_operating_revenue_ttm', 'capitalization']].apply(fun, axis=1)
# share_indicators = share_indicators.drop(columns=['total_operating_revenue_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'tor_ps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def tor_ps_latest(self, tp_share_indicators, factor_share_indicators):
"""
每股营业总收入
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'total_operating_revenue', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['tor_ps_latest'] = share_indicators[['total_operating_revenue', 'capitalization']].apply(fun, axis=1)
# share_indicators = share_indicators.drop(columns=['total_operating_revenue'], axis=1)
share_indicators = share_indicators[['symbol', 'tor_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def operating_revenue_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股营业收入
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'operating_revenue_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['operating_revenue_ps_ttm'] = share_indicators[
['operating_revenue_ttm', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['operating_revenue_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'operating_revenue_ps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def operating_revenue_ps_latest(self, tp_share_indicators, factor_share_indicators):
"""
每股营业收入(最新)
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'operating_revenue', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['operating_revenue_ps_latest'] = share_indicators[
['operating_revenue', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['operating_revenue'], axis=1)
share_indicators = share_indicators[['symbol', 'operating_revenue_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def operating_profit_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股营业利润
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'operating_profit_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['operating_profit_ps_ttm'] = share_indicators[
['operating_profit_ttm', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['operating_profit_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'operating_profit_ps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def operating_profit_ps_latest(self, tp_share_indicators, factor_share_indicators):
"""
每股营业利润(最新)
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'operating_profit', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['operating_profit_ps_latest'] = share_indicators[
['operating_profit', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['operating_profit'], axis=1)
share_indicators = share_indicators[['symbol', 'operating_profit_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def capital_surplus_fund_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股资本公积金
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'capital_reserve_fund', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['capital_surplus_fund_ps_latest'] = share_indicators[
['capital_reserve_fund', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['capital_reserve_fund'], axis=1)
share_indicators = share_indicators[['symbol', 'capital_surplus_fund_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def surplus_reserve_fund_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股盈余公积金
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'surplus_reserve_fund', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['surplus_reserve_fund_ps_latest'] = share_indicators[['surplus_reserve_fund', 'capitalization']].apply(fun, axis=1)
# share_indicators = share_indicators.drop(columns=['surplus_reserve_fund'], axis=1)
share_indicators = share_indicators[['symbol', 'surplus_reserve_fund_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def undivided_pro_fit_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股未分配利润
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'retained_profit', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['undivided_pro_fit_ps_latest'] = share_indicators[
['retained_profit', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['retained_profit'], axis=1)
share_indicators = share_indicators[['symbol', 'undivided_pro_fit_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def retained_earnings_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股留存收益
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'surplus_reserve_fund_ps_latest', 'undivided_pro_fit_ps_latest']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['retained_earnings_ps_latest'] = share_indicators['undivided_pro_fit_ps_latest'] + \
share_indicators['surplus_reserve_fund_ps_latest']
# share_indicators = share_indicators.drop(columns=['undivided_pro_fit_ps_latest', 'surplus_reserve_fund_ps_latest'], axis=1)
share_indicators = share_indicators[['symbol', 'retained_earnings_ps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def oper_cash_flow_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股经营活动产生的现金流量净额
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'net_operate_cash_flow_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['oper_cash_flow_ps_ttm'] = share_indicators[
['net_operate_cash_flow_ttm', 'capitalization']].apply(
fun,
axis=1)
# share_indicators = share_indicators.drop(columns=['net_operate_cash_flow_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'oper_cash_flow_ps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def cash_flow_ps(self, tp_share_indicators, factor_share_indicators):
"""
每股现金流量净额
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'n_change_in_cash_ttm', 'capitalization']
share_indicators = tp_share_indicators.loc[:, columns_lists]
fun = lambda x: (x[0] / x[1] if x[1] and x[1] != 0 else None)
share_indicators['cash_flow_ps_ttm'] = share_indicators[['n_change_in_cash_ttm', 'capitalization']].apply(fun, axis=1)
# share_indicators = share_indicators.drop(columns=['n_change_in_cash_ttm'], axis=1)
share_indicators = share_indicators[['symbol', 'cash_flow_ps_ttm']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def enterprise_fcfps(self, tp_share_indicators, factor_share_indicators):
"""
每股企业自由现金流量
缺每股企业自由现金流量
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'enterprise_fcfps']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['enterprise_fcfps_latest'] = share_indicators['enterprise_fcfps']
# share_indicators = share_indicators.drop(columns=['enterprise_fcfps'], axis=1)
share_indicators = share_indicators[['symbol', 'enterprise_fcfps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def shareholder_fcfps(self, tp_share_indicators, factor_share_indicators):
"""
每股股东自由现金流量
缺每股股东自由现金流量
:param tp_share_indicators:
:param factor_share_indicators:
:return:
"""
columns_lists = ['symbol', 'shareholder_fcfps']
share_indicators = tp_share_indicators.loc[:, columns_lists]
share_indicators['shareholder_fcfps_latest'] = share_indicators['shareholder_fcfps']
# share_indicators = share_indicators.drop(columns=['shareholder_fcfps'], axis=1)
share_indicators = share_indicators[['symbol', 'shareholder_fcfps_latest']]
factor_share_indicators = pd.merge(factor_share_indicators, share_indicators, on='symbol')
return factor_share_indicators
def calculate(trade_date, valuation_sets, scale):
"""
规模
:param scale: 规模类
:param valuation_sets: 基础数据
:param trade_date: 交易日
:return:
"""
if len(valuation_sets) <= 0:
print("%s has no data" % trade_date)
return
# psindu
factor_share_indicators = scale.eps(valuation_sets, valuation_sets)
factor_share_indicators = scale.diluted_eps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.cash_equivalent_ps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.dividend_ps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.eps_ttm(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.net_asset_ps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.tor_ps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.tor_ps_latest(factor_share_indicators, factor_share_indicators) # memorydrror
factor_share_indicators = scale.operating_revenue_ps(factor_share_indicators, factor_share_indicators) # memoryerror
factor_share_indicators = scale.operating_revenue_ps_latest(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.operating_profit_ps(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.operating_profit_ps_latest(valuation_sets, factor_share_indicators)
factor_share_indicators = scale.capital_surplus_fund_ps(factor_share_indicators, factor_share_indicators) # memoryerror
factor_share_indicators = scale.surplus_reserve_fund_ps(factor_share_indicators, factor_share_indicators) # memorydrror
factor_share_indicators = scale.undivided_pro_fit_ps(factor_share_indicators, factor_share_indicators) # memorydrror
factor_share_indicators = scale.retained_earnings_ps(factor_share_indicators, factor_share_indicators) # memorydrror
factor_share_indicators = scale.oper_cash_flow_ps(factor_share_indicators, factor_share_indicators) # memorydrror
factor_share_indicators = scale.cash_flow_ps(factor_share_indicators, factor_share_indicators) # memorydrror
# factor_historical_value = factor_historical_value.drop(columns=['pb', 'pe', 'ps', 'pcf', 'market_cap',
# 'circulating_market_cap', 'isymbol',
# 'np_parent_company_owners',
# 'np_parent_company_owners_3',
# 'np_parent_company_owners_5',
# 'net_operate_cash_flow', 'net_profit',
# 'goods_sale_and_service_render_cash'])
# factor_share_indicators = factor_share_indicators[['symbol',
# 'eps_latest',
# 'diluted_eps_ttm',
# 'cash_equivalent_ps_latest',
# 'dividend_ps_latest',
# 'eps_ttm',
# 'net_asset_ps_latest',
# 'tor_ps_latest',
# 'tor_ps_ttm',
# 'operating_revenue_ps_ttm',
# 'operating_revenue_ps_latest',
# 'operating_profit_ps_ttm',
# 'operating_profit_ps_latest',
# 'capital_surplus_fund_ps_latest',
# 'surplus_reserve_fund_ps_latest',
# 'undivided_pro_fit_ps_latest',
# 'retained_earnings_ps_latest',
# 'oper_cash_flow_ps_ttm',
# 'cash_flow_ps_ttm',
# 'enterprise_fcfps_latest',
# 'shareholder_fcfps_latest']]
factor_share_indicators = factor_share_indicators[['symbol',
'eps_latest',
'diluted_eps_ttm',
'cash_equivalent_ps_latest',
'dividend_ps_latest',
'eps_ttm',
'net_asset_ps_latest',
'tor_ps_latest',
'tor_ps_ttm',
'operating_revenue_ps_ttm',
'operating_revenue_ps_latest',
'operating_profit_ps_ttm',
'operating_profit_ps_latest',
'capital_surplus_fund_ps_latest',
'surplus_reserve_fund_ps_latest',
'undivided_pro_fit_ps_latest',
'retained_earnings_ps_latest',
'oper_cash_flow_ps_ttm',
'cash_flow_ps_ttm']]
factor_share_indicators['id'] = factor_share_indicators['symbol'] + str(trade_date)
factor_share_indicators['trade_date'] = str(trade_date)
scale._storage_data(factor_share_indicators, trade_date)
def do_update(self, start_date, end_date, count):
# 读取本地交易日
trade_date_sets = self._trade_date.trade_date_sets_ago(start_date, end_date, count)
for trade_date in trade_date_sets:
print('当前交易日: %s' % trade_date)
self.calculate(trade_date)
print('----->')
# @app.task()
def factor_calculate(**kwargs):
print("scale_kwargs: {}".format(kwargs))
date_index = kwargs['date_index']
session = kwargs['session']
scale = PerShareIndicators('factor_scale') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错
content = cache_data.get_cache(session, date_index)
total_growth_data = json_normalize(json.loads(str(content, encoding='utf8')))
print("len_total_growth_data {}".format(len(total_growth_data)))
calculate(date_index, total_growth_data, scale)
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: 0.1
@author: li
@file: historical_value.py
@time: 2019-01-28 11:33
"""
import sys
from datetime import datetime
sys.path.append("..")
import math
import numpy as np
from vision.fm.signletion_engine import *
from factor.utillities.calc_tools import CalcTools
import json
from pandas.io.json import json_normalize
from factor import app
from factor.factor_base import FactorBase
from factor.ttm_fundamental import *
from vision.fm.signletion_engine import *
from factor.utillities import trade_date as td
from ultron.cluster.invoke.cache_data import cache_data
class HistoricalValue(FactorBase):
def __init__(self, name):
super(HistoricalValue, self).__init__(name)
# 构建因子表
def create_dest_tables(self):
"""
创建数据库表
:return:
"""
drop_sql = """drop table if exists `{0}`""".format(self._name)
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`PSIndu` decimal(19,4) NOT NULL,
`EarnToPrice` decimal(19,4),
`PEIndu` decimal(19,4),
`PEG3YChgTTM` decimal(19,4),
`PEG5YChgTTM` decimal(19, 4),
`PBIndu` decimal(19,4),
`historical_value_lcap_latest` decimal(19,4),
`historical_value_lflo_latest` decimal(19,4),
`historical_value_nlsize_latest` decimal(19,4),
`PCFIndu` decimal(19,4),
`CEToPTTM` decimal(19,4),
`historical_value_ctop_latest` decimal(19,4),
PRIMARY KEY(`id`,`trade_date`,`symbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self._name)
super(HistoricalValue, self)._create_tables(create_sql, drop_sql)
def ps_indu(self, tp_historical_value, factor_historical_value):
"""
PEIndu, 市销率,以及同行业所有的公司的市销率
# (PS – PS 的行业均值)/PS 的行业标准差
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
# 行业均值,行业标准差
columns_lists = ['symbol', 'ps', 'isymbol']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value_grouped = historical_value.groupby('isymbol')
historical_value_mean = historical_value_grouped.mean()
historical_value_std = historical_value_grouped.std()
historical_value_std = historical_value_std.rename(columns={"ps": "ps_std"}).reset_index()
historical_value_mean = historical_value_mean.rename(columns={"ps": "ps_mean"}).reset_index()
historical_value = historical_value.merge(historical_value_std, on='isymbol')
historical_value = historical_value.merge(historical_value_mean, on='isymbol')
historical_value['PSIndu'] = (historical_value['ps'] - historical_value['ps_mean']) / historical_value["ps_std"]
historical_value = historical_value.drop(columns=['ps', 'isymbol', 'ps_mean', 'ps_std'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def etop(self, tp_historical_value, factor_historical_value):
"""
收益市值比
# 收益市值比= 净利润/总市值
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'net_profit', 'market_cap']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value['EarnToPrice'] = np.where(CalcTools.is_zero(historical_value['market_cap']),
0,
historical_value['net_profit'] /
historical_value['market_cap'])
historical_value = historical_value.drop(columns=['net_profit', 'market_cap'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
# 5年平均收益市值比 = 近5年净利润 / 近5年总市值
def etp5(self, tp_historical_value, factor_historical_value):
columns_lists = ['symbol', 'net_profit_5', 'circulating_market_cap_5', 'market_cap_5']
historical_value = tp_historical_value.loc[:, columns_lists]
fun = lambda x: x[0] / x[1] if x[1] is not None and x[1] != 0 else (x[0] / x[2] if x[2] is not None and x[2] !=0 else None)
historical_value['historical_value_etp5_ttm'] = historical_value[['net_profit_5', 'circulating_market_cap_5', 'market_cap_5']].apply(fun, axis=1)
historical_value = historical_value.drop(columns=['net_profit_5', 'circulating_market_cap_5', 'market_cap_5'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def pe_indu(self, tp_historical_value, factor_historical_value):
"""
# (PE – PE 的行业均值)/PE 的行业标准差
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'pe', 'isymbol']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value_grouped = historical_value.groupby('isymbol')
historical_value_mean = historical_value_grouped.mean()
historical_value_std = historical_value_grouped.std()
historical_value_std = historical_value_std.rename(columns={"pe": "pe_std"}).reset_index()
historical_value_mean = historical_value_mean.rename(columns={"pe": "pe_mean"}).reset_index()
historical_value = historical_value.merge(historical_value_std, on='isymbol')
historical_value = historical_value.merge(historical_value_mean, on='isymbol')
historical_value['PEIndu'] = (historical_value['pe'] - historical_value['pe_mean']) / historical_value["pe_std"]
historical_value = historical_value.drop(columns=['pe', 'isymbol', 'pe_mean', 'pe_std'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def peg_3y(self, tp_historical_value, factor_historical_value):
"""
# 市盈率/归属于母公司所有者净利润 3 年复合增长率
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'pe', 'np_parent_company_owners', 'np_parent_company_owners_3']
historical_value = tp_historical_value.loc[:, columns_lists]
tmp = np.where(CalcTools.is_zero(historical_value['np_parent_company_owners_3']), 0,
(historical_value['np_parent_company_owners'] / historical_value['np_parent_company_owners_3']))
historical_value['PEG3YChgTTM'] = tmp / abs(tmp) * pow(abs(tmp), 1 / 3.0) - 1
historical_value = historical_value.drop(
columns=['pe', 'np_parent_company_owners', 'np_parent_company_owners_3'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def peg_5y(self, tp_historical_value, factor_historical_value):
"""
# 市盈率/归属于母公司所有者净利润 5 年复合增长率
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'pe', 'np_parent_company_owners', 'np_parent_company_owners_5']
historical_value = tp_historical_value.loc[:, columns_lists]
tmp = np.where(CalcTools.is_zero(historical_value['np_parent_company_owners_5']), 0,
(historical_value['np_parent_company_owners'] / historical_value['np_parent_company_owners_5']))
historical_value['PEG5YChgTTM'] = tmp / abs(tmp) * pow(abs(tmp), 1 / 5.0) - 1
historical_value = historical_value.drop(
columns=['pe', 'np_parent_company_owners', 'np_parent_company_owners_5'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def pb_indu(self, tp_historical_value, factor_historical_value):
"""
# (PB – PB 的行业均值)/PB 的行业标准差
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'pb', 'isymbol']
# 行业均值, 行业标准差
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value_grouped = historical_value.groupby('isymbol')
historical_value_mean = historical_value_grouped.mean()
historical_value_std = historical_value_grouped.std()
historical_value_std = historical_value_std.rename(columns={"pb": "pb_std"}).reset_index()
historical_value_mean = historical_value_mean.rename(columns={"pb": "pb_mean"}).reset_index()
historical_value = historical_value.merge(historical_value_std, on='isymbol')
historical_value = historical_value.merge(historical_value_mean, on='isymbol')
historical_value['PBIndu'] = (historical_value['pb'] - historical_value['pb_mean']) / historical_value["pb_std"]
historical_value = historical_value.drop(columns=['pb', 'isymbol', 'pb_mean', 'pb_std'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def lcap(self, tp_historical_value, factor_historical_value):
"""
总市值的对数
# 对数市值 即市值的对数
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'market_cap']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value['historical_value_lcap_latest'] = historical_value['market_cap'].map(lambda x: math.log(abs(x)))
historical_value = historical_value.drop(columns=['market_cap'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def lflo(self, tp_historical_value, factor_historical_value):
"""
流通总市值的对数
# 对数市值 即流通市值的对数
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'circulating_market_cap']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value['historical_value_lflo_latest'] = historical_value['circulating_market_cap'].map(lambda x: math.log(abs(x)))
historical_value = historical_value.drop(columns=['circulating_market_cap'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def nlsize(self, tp_historical_value, factor_historical_value):
"""
对数市值开立方
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'historical_value_lcap_latest'] # 对数市值
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value['historical_value_nlsize_latest'] = historical_value['historical_value_lcap_latest'].map(lambda x: pow(math.log(abs(x)), 1/3.0))
historical_value = historical_value.drop(columns=['historical_value_lcap_latest'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def pcf_indu(self, tp_historical_value, factor_historical_value):
"""
# (PCF – PCF 的行业均值)/PCF 的行业标准差
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'pcf', 'isymbol']
# 行业均值, 行业标准差
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value_grouped = historical_value.groupby('isymbol')
historical_value_mean = historical_value_grouped.mean()
historical_value_std = historical_value_grouped.std()
historical_value_std = historical_value_std.rename(columns={"pcf": "pcf_std"}).reset_index()
historical_value_mean = historical_value_mean.rename(columns={"pcf": "pcf_mean"}).reset_index()
historical_value = historical_value.merge(historical_value_std, on='isymbol')
historical_value = historical_value.merge(historical_value_mean, on='isymbol')
historical_value['PCFIndu'] = (historical_value['pcf'] - historical_value['pcf_mean']) / historical_value[
"pcf_std"]
historical_value = historical_value.drop(columns=['pcf', 'isymbol', 'pcf_mean', 'pcf_std'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def cetop(self, tp_historical_value, factor_historical_value):
"""
# 经营活动产生的现金流量净额与市值比
:param tp_historical_value:
:param factor_historical_value:
:return:
"""
columns_lists = ['symbol', 'net_operate_cash_flow', 'market_cap']
historical_value = tp_historical_value.loc[:, columns_lists]
historical_value['CEToPTTM'] = np.where(CalcTools.is_zero(historical_value['market_cap']), 0,
historical_value['net_operate_cash_flow'] /
historical_value['market_cap'])
historical_value = historical_value.drop(columns=['net_operate_cash_flow', 'market_cap'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
# 现金流市值比 = 每股派现 * 分红前总股本/总市值
def ctop(self, tp_historical_value, factor_historical_value):
columns_lists = ['symbol', 'pcd', 'sbd', 'circulating_market_cap', 'market_cap']
historical_value = tp_historical_value.loc[:, columns_lists]
fun = lambda x: x[0] * x[1] / x[2] if x[2] is not None and x[2] != 0 else (x[0] * x[1] / x[3] if x[3] is not None and x[3] != 0 else None)
historical_value['historical_value_ctop_latest'] = historical_value[['pcd', 'sbd', 'circulating_market_cap', 'market_cap']].apply(fun, axis=1)
historical_value = historical_value.drop(columns=['pcd', 'sbd', 'circulating_market_cap', 'market_cap'], axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
# 5 年平均现金流市值比 = 近5年每股派现 * 分红前总股本/近5年总市值
def ctop5(self, tp_historical_value, factor_historical_value):
columns_lists = ['symbol', 'pcd', 'sbd', 'circulating_market_cap_5', 'market_cap_5']
historical_value = tp_historical_value.loc[:, columns_lists]
fun = lambda x: x[0] * x[1] / x[2] if x[2] is not None and x[2] != 0 else (
x[0] * x[1] / x[3] if x[3] is not None and x[3] != 0 else None)
historical_value['historical_value_ctop5_latest'] = historical_value[
['pcd', 'sbd', 'circulating_market_cap_5', 'market_cap_5']].apply(fun, axis=1)
historical_value = historical_value.drop(columns=['pcd', 'sbd', 'circulating_market_cap_5', 'market_cap_5'],
axis=1)
factor_historical_value = pd.merge(factor_historical_value, historical_value, on="symbol")
return factor_historical_value
def calculate(trade_date, valuation_sets, historical_value):
"""
:param trade_date:
:return:
"""
# valuation_sets, ttm_factor_sets, cash_flow_sets, income_sets = self.get_basic_data(trade_date)
# valuation_sets = pd.merge(valuation_sets, income_sets, on='symbol')
# valuation_sets = pd.merge(valuation_sets, ttm_factor_sets, on='symbol')
# valuation_sets = pd.merge(valuation_sets, cash_flow_sets, on='symbol')
if len(valuation_sets) <= 0:
print("%s has no data" % trade_date)
return
# psindu
factor_historical_value = historical_value.ps_indu(valuation_sets, valuation_sets)
factor_historical_value = historical_value.etop(valuation_sets, factor_historical_value)
# factor_historical_value = historical_value.etp5(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.pe_indu(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.peg_3y(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.peg_5y(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.pb_indu(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.lcap(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.lflo(factor_historical_value, factor_historical_value)
factor_historical_value = historical_value.nlsize(factor_historical_value, factor_historical_value)
factor_historical_value = historical_value.pcf_indu(valuation_sets, factor_historical_value)
factor_historical_value = historical_value.cetop(factor_historical_value, factor_historical_value)
factor_historical_value = historical_value.ctop(valuation_sets, factor_historical_value)
# factor_historical_value = historical_value.ctop5(valuation_sets, factor_historical_value)
# etp5 因子没有提出, 使用该部分的时候, 数据库字段需要添加
# factor_historical_value = factor_historical_value[['symbol', 'PSIndu',
# 'historical_value_etp5_ttm',
# 'EarnToPrice',
# 'PEIndu', 'PEG3YChgTTM',
# 'PEG5YChgTTM', 'PBIndu',
# 'historical_value_lcap_latest','historical_value_lflo_latest',
# 'historical_value_nlsize_latest',
# 'PCFIndu',
# 'CEToPTTM',
# 'historical_value_ctop_latest',
# 'historical_value_ctop5_latest']]
factor_historical_value = factor_historical_value[['symbol',
'PSIndu',
'EarnToPrice',
'PEIndu',
'PEG3YChgTTM',
'PEG5YChgTTM',
'PBIndu',
'historical_value_lcap_latest',
'historical_value_lflo_latest',
'historical_value_nlsize_latest',
'PCFIndu',
'CEToPTTM',
'historical_value_ctop_latest']]
factor_historical_value['id'] = factor_historical_value['symbol'] + str(trade_date)
factor_historical_value['trade_date'] = str(trade_date)
historical_value._storage_data(factor_historical_value, trade_date)
def do_update(self, start_date, end_date, count):
# 读取本地交易日
trade_date_sets = self._trade_date.trade_date_sets_ago(start_date, end_date, count)
for trade_date in trade_date_sets:
print('因子计算日期: %s' % trade_date)
self.calculate(trade_date)
print('----->')
@app.task()
def factor_calculate(**kwargs):
print("history_value_kwargs: {}".format(kwargs))
date_index = kwargs['date_index']
session = kwargs['session']
historical_value = HistoricalValue('factor_historical_value') # 注意, 这里的name要与client中新建table时的name一致, 不然回报错
content = cache_data.get_cache(session, date_index)
total_history_data = json_normalize(json.loads(str(content, encoding='utf8')))
print("len_history_value_data {}".format(len(total_history_data)))
calculate(date_index, total_history_data, historical_value)
# -*- coding: utf-8 -*-
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text, JSON,TIMESTAMP
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
metadata = Base.metadata
class Growth(Base):
__tablename__ = 'growth'
trade_date = Column(DateTime, primary_key=True, nullable=False)
code = Column(Integer, primary_key=True, nullable=False)
net_asset_grow_rate_latest = Column(Float(53))
total_asset_grow_rate_latest = Column(Float(53))
operating_revenue_grow_rate_ttm = Column(Float(53))
operating_profit_grow_rate_ttm = Column(Float(53))
total_profit_grow_rate_ttm = Column(Float(53))
net_profit_grow_rate_ttm = Column(Float(53))
np_parent_company_grow_rate = Column(Float(53))
net_profit_grow_rate_3y_ttm = Column(Float(53))
net_profit_grow_rate_5y_ttm = Column(Float(53))
operating_revenue_grow_rate_3y_ttm = Column(Float(53))
operating_revenue_grow_rate_5y_ttm = Column(Float(53))
net_cash_flow_grow_rate_ttm = Column(Float(53))
np_parent_company_cut_yoy_ttm = Column(Float(53))
growth_egro_ttm = Column(Float(53))
growth_sue_ttm = Column(Float(53))
growth_suoi_ttm = Column(Float(53))
financing_cash_grow_rate_ttm = Column(Float(53))
invest_cash_grow_rate_ttm = Column(Float(53))
oper_cash_grow_rate_ttm = Column(Float(53))
growth_sgro_ttm = Column(Float(53))
import sys
from tm_import_utils import TmImportUtils
sys.path.append('..')
import config
import sqlalchemy as sa
import pandas as pd
from sqlalchemy.orm import sessionmaker
class BaseSync(object):
def __init__(self, dest_table):
source_db = '''mssql+pymssql://{0}:{1}@{2}:{3}/{4}'''.format(config.source_db_user, config.source_db_pwd,
config.source_db_host, config.source_db_port,
config.source_db_database)
destination_db = '''mysql+mysqlconnector://{0}:{1}@{2}:{3}/{4}'''.format(config.destination_db_user,
config.destination_db_pwd,
config.destination_db_host,
config.destination_db_port,
config.destination_db_database)
# 源数据库
self.source = sa.create_engine(source_db)
# 目标数据库
self.destination = sa.create_engine(destination_db)
# 目标数据库Session
self.dest_session = sessionmaker(bind=self.destination, autocommit=False, autoflush=True)
self.dest_table = dest_table
def get_start_date(self):
sql = """select max(trade_date) as trade_date from `{0}`;""".format(self.dest_table)
trades_sets = pd.read_sql(sql, self.destination)
td = 20070101
if not trades_sets.empty:
td = trades_sets['trade_date'][0]
td = str(td).replace('-', '')
return td
def delete_trade_data(self, trade_date):
session = self.dest_session()
session.execute('''delete from `{0}` where trade_date={1}'''.format(self.dest_table, trade_date))
session.commit()
def create_table(self, create_sql):
drop_sql = """drop table if exists `{0}`;""".format(self.dest_table)
session = self.dest_session()
session.execute(drop_sql)
session.execute(create_sql)
session.execute(
'''alter table `{0}` add `creat_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP;'''.format(self.dest_table))
session.execute(
'''alter table `{0}` add `update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP;'''.format(
self.dest_table))
session.commit()
session.close()
def insert_or_update(self, datas):
session = self.dest_session()
for i in range(datas.shape[0]):
data = datas.iloc[i]
values = ''
update = ''
title = ''
for j in range(len(data)):
index = data.index[j]
value = str(data[j]).replace("'", "\\'").replace("%", "\\%")
title += """`{0}`,""".format(index)
values += """'{0}',""".format(value)
update += """`{0}`='{1}',""".format(index, value)
sql = '''insert into {0} ({1}) values({2}) ON DUPLICATE KEY UPDATE {3}'''.format(self.dest_table,
title[0:-1],
values[0:-1],
update[0:-1]
)
sql = sql.replace("'nan'", 'Null').replace("'None'", 'Null')
session.execute(sql)
session.commit()
session.close()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkBalanceSheetParent(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_FIN_PROBALSHEETNEW'
self.dest_table = 'stk_balance_sheet_parent'
super(SyncStkBalanceSheetParent, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id VARCHAR(12) NOT NULL,
company_id VARCHAR(20) NOT NULL,
company_name VARCHAR(100) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
symbol VARCHAR(12) NOT NULL,
pub_date Date NOT NULL,
end_date Date NOT NULL,
report_type VARCHAR(10) NOT NULL,
report_date VARCHAR(2) NOT NULL,
source VARCHAR(10) NOT NULL,
cash_equivalents NUMERIC(26,2) DEFAULT NULL,
trading_assets NUMERIC(26,2) DEFAULT NULL,
bill_receivable NUMERIC(26,2) DEFAULT NULL,
account_receivable NUMERIC(26,2) DEFAULT NULL,
advance_payment NUMERIC(26,2) DEFAULT NULL,
other_receivable NUMERIC(26,2) DEFAULT NULL,
interest_receivable NUMERIC(26,2) DEFAULT NULL,
dividend_receivable NUMERIC(26,2) DEFAULT NULL,
inventories NUMERIC(26,2) DEFAULT NULL,
non_current_asset_in_one_year NUMERIC(26,2) DEFAULT NULL,
total_current_assets NUMERIC(26,2) DEFAULT NULL,
hold_for_sale_assets NUMERIC(26,2) DEFAULT NULL,
hold_to_maturity_investments NUMERIC(26,2) DEFAULT NULL,
longterm_receivable_account NUMERIC(26,2) DEFAULT NULL,
longterm_equity_invest NUMERIC(26,2) DEFAULT NULL,
investment_property NUMERIC(26,2) DEFAULT NULL,
fixed_assets NUMERIC(26,2) DEFAULT NULL,
constru_in_process NUMERIC(26,2) DEFAULT NULL,
construction_materials NUMERIC(26,2) DEFAULT NULL,
fixed_assets_liquidation NUMERIC(26,2) DEFAULT NULL,
biological_assets NUMERIC(26,2) DEFAULT NULL,
oil_gas_assets NUMERIC(26,2) DEFAULT NULL,
intangible_assets NUMERIC(26,2) DEFAULT NULL,
development_expenditure NUMERIC(26,2) DEFAULT NULL,
good_will NUMERIC(26,2) DEFAULT NULL,
long_deferred_expense NUMERIC(26,2) DEFAULT NULL,
deferred_tax_assets NUMERIC(26,2) DEFAULT NULL,
total_non_current_assets NUMERIC(26,2) DEFAULT NULL,
total_assets NUMERIC(26,2) DEFAULT NULL,
shortterm_loan NUMERIC(26,2) DEFAULT NULL,
trading_liability NUMERIC(26,2) DEFAULT NULL,
notes_payable NUMERIC(26,2) DEFAULT NULL,
accounts_payable NUMERIC(26,2) DEFAULT NULL,
advance_peceipts NUMERIC(26,2) DEFAULT NULL,
salaries_payable NUMERIC(26,2) DEFAULT NULL,
taxs_payable NUMERIC(26,2) DEFAULT NULL,
interest_payable NUMERIC(26,2) DEFAULT NULL,
dividend_payable NUMERIC(26,2) DEFAULT NULL,
other_payable NUMERIC(26,2) DEFAULT NULL,
non_current_liability_in_one_year NUMERIC(26,2) DEFAULT NULL,
total_current_liability NUMERIC(26,2) DEFAULT NULL,
longterm_loan NUMERIC(26,2) DEFAULT NULL,
bonds_payable NUMERIC(26,2) DEFAULT NULL,
longterm_account_payable NUMERIC(26,2) DEFAULT NULL,
specific_account_payable NUMERIC(26,2) DEFAULT NULL,
estimate_liability NUMERIC(26,2) DEFAULT NULL,
deferred_tax_liability NUMERIC(26,2) DEFAULT NULL,
total_non_current_liability NUMERIC(26,2) DEFAULT NULL,
total_liability NUMERIC(26,2) DEFAULT NULL,
paidin_capital NUMERIC(26,2) DEFAULT NULL,
capital_reserve_fund NUMERIC(26,2) DEFAULT NULL,
specific_reserves NUMERIC(26,2) DEFAULT NULL,
surplus_reserve_fund NUMERIC(26,2) DEFAULT NULL,
treasury_stock NUMERIC(26,2) DEFAULT NULL,
retained_profit NUMERIC(26,2) DEFAULT NULL,
equities_parent_company_owners NUMERIC(26,2) DEFAULT NULL,
minority_interests NUMERIC(26,2) DEFAULT NULL,
foreign_currency_report_conv_diff NUMERIC(26,2) DEFAULT NULL,
total_owner_equities NUMERIC(26,2) DEFAULT NULL,
total_sheet_owner_equities NUMERIC(26,2) DEFAULT NULL,
other_comprehesive_income NUMERIC(26,2) DEFAULT NULL,
deferred_earning NUMERIC(26,2) DEFAULT NULL,
settlement_provi NUMERIC(26,2) DEFAULT NULL,
lend_capital NUMERIC(26,2) DEFAULT NULL,
loan_and_advance_current_assets NUMERIC(26,2) DEFAULT NULL,
insurance_receivables NUMERIC(26,2) DEFAULT NULL,
reinsurance_receivables NUMERIC(26,2) DEFAULT NULL,
reinsurance_contract_reserves_receivable NUMERIC(26,2) DEFAULT NULL,
bought_sellback_assets NUMERIC(26,2) DEFAULT NULL,
hold_sale_asset NUMERIC(26,2) DEFAULT NULL,
loan_and_advance_noncurrent_assets NUMERIC(26,2) DEFAULT NULL,
borrowing_from_centralbank NUMERIC(26,2) DEFAULT NULL,
deposit_in_interbank NUMERIC(26,2) DEFAULT NULL,
borrowing_capital NUMERIC(26,2) DEFAULT NULL,
derivative_financial_liability NUMERIC(26,2) DEFAULT NULL,
sold_buyback_secu_proceeds NUMERIC(26,2) DEFAULT NULL,
commission_payable NUMERIC(26,2) DEFAULT NULL,
reinsurance_payables NUMERIC(26,2) DEFAULT NULL,
insurance_contract_reserves NUMERIC(26,2) DEFAULT NULL,
proxy_secu_proceeds NUMERIC(26,2) DEFAULT NULL,
receivings_from_vicariously_sold_securities NUMERIC(26,2) DEFAULT NULL,
hold_sale_liability NUMERIC(26,2) DEFAULT NULL,
estimate_liability_current NUMERIC(26,2) DEFAULT NULL,
preferred_shares_noncurrent NUMERIC(26,2) DEFAULT NULL,
pepertual_liability_noncurrent NUMERIC(26,2) DEFAULT NULL,
longterm_salaries_payable NUMERIC(26,2) DEFAULT NULL,
other_equity_tools NUMERIC(26,2) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`end_date`,`report_type`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
b.Sname as company_name,
b.Symbol as code,
a.PUBLISHDATE as pub_date,
a.ENDDATE as end_date,
a.REPORTDATETYPE as report_date,
a.REPORTTYPE as report_type,
a.DATASOURCE as source,
a.CURFDS as cash_equivalents,
a.TRADFINASSET as trading_assets,
a.NOTESRECE as bill_receivable,
a.ACCORECE as account_receivable,
a.PREP as advance_payment,
a.OTHERRECE as other_receivable,
a.INTERECE as interest_receivable,
a.DIVIDRECE as dividend_receivable,
a.INVE as inventories,
a.EXPINONCURRASSET as non_current_asset_in_one_year,
a.TOTCURRASSET as total_current_assets,
a.AVAISELLASSE as hold_for_sale_assets,
a.HOLDINVEDUE as hold_to_maturity_investments,
a.LONGRECE as longterm_receivable_account,
a.EQUIINVE as longterm_equity_invest,
a.INVEPROP as investment_property,
a.FIXEDASSEIMMO as fixed_assets,
a.CONSPROG as constru_in_process,
a.ENGIMATE as construction_materials,
a.FIXEDASSECLEA as fixed_assets_liquidation,
a.PRODASSE as biological_assets,
a.HYDRASSET as oil_gas_assets,
a.INTAASSET as intangible_assets,
a.DEVEEXPE as development_expenditure,
a.GOODWILL as good_will,
a.LOGPREPEXPE as long_deferred_expense,
a.DEFETAXASSET as deferred_tax_assets,
a.TOTALNONCASSETS as total_non_current_assets,
a.TOTASSET as total_assets,
a.SHORTTERMBORR as shortterm_loan,
a.TRADFINLIAB as trading_liability,
a.NOTESPAYA as notes_payable,
a.ACCOPAYA as accounts_payable,
a.ADVAPAYM as advance_peceipts,
a.COPEWORKERSAL as salaries_payable,
a.TAXESPAYA as taxs_payable,
a.INTEPAYA as interest_payable,
a.DIVIPAYA as dividend_payable,
a.OTHERFEEPAYA as other_payable,
a.DUENONCLIAB as non_current_liability_in_one_year,
a.TOTALCURRLIAB as total_current_liability,
a.LONGBORR as longterm_loan,
a.BDSPAYA as bonds_payable,
a.LONGPAYA as longterm_account_payable,
a.SPECPAYA as specific_account_payable,
a.EXPECURRLIAB+EXPENONCLIAB as estimate_liability,
a.DEFEINCOTAXLIAB as deferred_tax_liability,
a.TOTALNONCLIAB as total_non_current_liability,
a.TOTLIAB as total_liability,
a.PAIDINCAPI as paidin_capital,
a.CAPISURP as capital_reserve_fund,
a.SPECRESE as specific_reserves,
a.RESE as surplus_reserve_fund,
a.TREASTK as treasury_stock,
a.UNDIPROF as retained_profit,
a.PARESHARRIGH as equities_parent_company_owners,
a.MINYSHARRIGH as minority_interests,
a.CURTRANDIFF as foreign_currency_report_conv_diff,
a.RIGHAGGR as total_owner_equities,
a.TOTLIABSHAREQUI as total_sheet_owner_equities,
a.OCL as other_comprehesive_income,
a.DEFEREVE as deferred_earning,
a.SETTRESEDEPO as settlement_provi,
a.PLAC as lend_capital,
a.LENDANDLOAN as loan_and_advance_current_assets,
a.PREMRECE as insurance_receivables,
a.REINRECE as reinsurance_receivables,
a.REINCONTRESE as reinsurance_contract_reserves_receivable,
a.PURCRESAASSET as bought_sellback_assets,
a.ACCHELDFORS as hold_sale_asset,
a.LENDANDLOAN as loan_and_advance_noncurrent_assets,
a.CENBANKBORR as borrowing_from_centralbank,
a.DEPOSIT as deposit_in_interbank,
a.FDSBORR as borrowing_capital,
a.DERILIAB as derivative_financial_liability,
a.SELLREPASSE as sold_buyback_secu_proceeds,
a.COPEPOUN as commission_payable,
a.COPEWITHREINRECE as reinsurance_payables,
a.INSUCONTRESE as insurance_contract_reserves,
a.ACTITRADSECU as proxy_secu_proceeds,
a.ACTIUNDESECU as receivings_from_vicariously_sold_securities,
a.LIABHELDFORS as hold_sale_liability,
a.EXPECURRLIAB as estimate_liability_current,
a.PREST as preferred_shares_noncurrent,
a.PERBOND as pepertual_liability_noncurrent,
a.LCOPEWORKERSAL as longterm_salaries_payable,
a.OTHEQUIN as other_equity_tools,
b.Exchange,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where REPORTTYPE in ('2','4') and b.SType ='EQA' and b.Enabled=0 and b.status=0 and a.ACCSTACODE=11002"""
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=1)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncStkBalanceSheetParent()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkBalanceSheetParent()
processor.do_update()
elif args.report:
processor = SyncStkBalanceSheetParent()
processor.update_report(args.count, end_date)
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
import config
class SyncStkCapitalChange(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_SHARESTRUCHG'
self.dest_table = 'stk_capital_change'
super(SyncStkCapitalChange, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT,
symbol VARCHAR(20),
company_id VARCHAR(10),
pub_date Date,
begin_date Date,
end_date Date,
total_shares NUMERIC(15,6),
floating_shares NUMERIC(15,6),
floating_ashares NUMERIC(15,6),
floating_bshares NUMERIC(15,6),
floating_hshares NUMERIC(15,6),
other_floating_shares NUMERIC(15,6),
restrict_floating_shares NUMERIC(15,6),
restrict_floating_ashares NUMERIC(15,6),
non_floating_ashares NUMERIC(15,6),
free_floating_shares NUMERIC(15,6),
b_shares NUMERIC(15,6),
exdividend_date Date,
`explain` VARCHAR(100),
change_type VARCHAR(2),
change_reason VARCHAR(1000),
is_valid INT,
entry_date DATE,
entry_time VARCHAR(8),
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`begin_date`,`exdividend_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
a.PUBLISHDATE as pub_date,
a.BEGINDATE as begin_date,
a.ENDDATE as end_date,
a.TOTALSHARE as total_shares,
a.CIRCSKAMT as floating_shares,
a.CIRCAAMT as floating_ashares,
a.CIRCBAMT as floating_bshares,
a.CIRCHAMT as floating_hshares,
a.OTHERCIRCAMT as other_floating_shares,
a.LIMSKAMT as restrict_floating_shares,
a.RECIRCAAMT as restrict_floating_ashares,
a.NCIRCAMT as non_floating_ashares,
a.FCIRCAAMT as free_floating_shares,
a.BSK as b_shares,
a.EXRIGHTDATE as exdividend_date,
a.EXRIGHTEXP as explain,
a.SKCHGTYPE as change_type,
a.SHCHGRSN as change_reason,
a.ISVALID as is_valid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 2000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkCapitalChange()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkCapitalChange()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkCashflowStatementParent(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_FIN_PROCFSTATEMENTNEW'
self.dest_table = 'stk_cashflow_statement_parent'
super(SyncStkCashflowStatementParent, self).__init__(self.dest_table)
# 源数据库
self.source = sa.create_engine("mssql+pymssql://read:read@192.168.100.64:1433/QADB?charset=GBK")
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id VARCHAR(12) NOT NULL,
company_id VARCHAR(20) NOT NULL,
company_name VARCHAR(20) NOT NULL,
symbol VARCHAR(12) NOT NULL,
pub_date Date NOT NULL,
start_date Date NOT NULL,
end_date Date NOT NULL,
report_type VARCHAR(10) NOT NULL,
report_date VARCHAR(10) NOT NULL,
source VARCHAR(10) NOT NULL,
goods_sale_and_service_render_cash NUMERIC(26,2) DEFAULT NULL,
tax_levy_refund NUMERIC(26,2) DEFAULT NULL,
subtotal_operate_cash_inflow NUMERIC(26,2) DEFAULT NULL,
goods_and_services_cash_paid NUMERIC(26,2) DEFAULT NULL,
staff_behalf_paid NUMERIC(26,2) DEFAULT NULL,
tax_payments NUMERIC(26,2) DEFAULT NULL,
subtotal_operate_cash_outflow NUMERIC(26,2) DEFAULT NULL,
net_operate_cash_flow NUMERIC(26,2) DEFAULT NULL,
invest_withdrawal_cash NUMERIC(26,2) DEFAULT NULL,
invest_proceeds NUMERIC(26,2) DEFAULT NULL,
fix_intan_other_asset_dispo_cash NUMERIC(26,2) DEFAULT NULL,
net_cash_deal_subcompany NUMERIC(26,2) DEFAULT NULL,
subtotal_invest_cash_inflow NUMERIC(26,2) DEFAULT NULL,
fix_intan_other_asset_acqui_cash NUMERIC(26,2) DEFAULT NULL,
invest_cash_paid NUMERIC(26,2) DEFAULT NULL,
impawned_loan_net_increase NUMERIC(26,2) DEFAULT NULL,
net_cash_from_sub_company NUMERIC(26,2) DEFAULT NULL,
subtotal_invest_cash_outflow NUMERIC(26,2) DEFAULT NULL,
net_invest_cash_flow NUMERIC(26,2) DEFAULT NULL,
cash_from_invest NUMERIC(26,2) DEFAULT NULL,
cash_from_borrowing NUMERIC(26,2) DEFAULT NULL,
cash_from_bonds_issue NUMERIC(26,2) DEFAULT NULL,
subtotal_finance_cash_inflow NUMERIC(26,2) DEFAULT NULL,
borrowing_repayment NUMERIC(26,2) DEFAULT NULL,
dividend_interest_payment NUMERIC(26,2) DEFAULT NULL,
subtotal_finance_cash_outflow NUMERIC(26,2) DEFAULT NULL,
net_finance_cash_flow NUMERIC(26,2) DEFAULT NULL,
exchange_rate_change_effect NUMERIC(26,2) DEFAULT NULL,
cash_equivalent_increase NUMERIC(26,2) DEFAULT NULL,
cash_equivalents_at_beginning NUMERIC(26,2) DEFAULT NULL,
cash_and_equivalents_at_end NUMERIC(26,2) DEFAULT NULL,
net_profit NUMERIC(26,2) DEFAULT NULL,
assets_depreciation_reserves NUMERIC(26,2) DEFAULT NULL,
fixed_assets_depreciation NUMERIC(26,2) DEFAULT NULL,
intangible_assets_amortization NUMERIC(26,2) DEFAULT NULL,
defferred_expense_amortization NUMERIC(26,2) DEFAULT NULL,
fix_intan_other_asset_dispo_loss NUMERIC(26,2) DEFAULT NULL,
fixed_asset_scrap_loss NUMERIC(26,2) DEFAULT NULL,
fair_value_change_loss NUMERIC(26,2) DEFAULT NULL,
financial_cost NUMERIC(26,2) DEFAULT NULL,
invest_loss NUMERIC(26,2) DEFAULT NULL,
deffered_tax_asset_decrease NUMERIC(26,2) DEFAULT NULL,
deffered_tax_liability_increase NUMERIC(26,2) DEFAULT NULL,
inventory_decrease NUMERIC(26,2) DEFAULT NULL,
operate_receivables_decrease NUMERIC(26,2) DEFAULT NULL,
operate_payable_increase NUMERIC(26,2) DEFAULT NULL,
others NUMERIC(26,2) DEFAULT NULL,
net_operate_cash_flow_indirect NUMERIC(26,2) DEFAULT NULL,
debt_to_capital NUMERIC(26,2) DEFAULT NULL,
cbs_expiring_in_one_year NUMERIC(26,2) DEFAULT NULL,
financial_lease_fixed_assets NUMERIC(26,2) DEFAULT NULL,
cash_at_end NUMERIC(26,2) DEFAULT NULL,
cash_at_beginning NUMERIC(26,2) DEFAULT NULL,
equivalents_at_end NUMERIC(26,2) DEFAULT NULL,
equivalents_at_beginning NUMERIC(26,2) DEFAULT NULL,
cash_equivalent_increase_indirect NUMERIC(26,2) DEFAULT NULL,
net_deposit_increase NUMERIC(26,2) DEFAULT NULL,
net_borrowing_from_central_bank NUMERIC(26,2) DEFAULT NULL,
net_borrowing_from_finance_co NUMERIC(26,2) DEFAULT NULL,
net_original_insurance_cash NUMERIC(26,2) DEFAULT NULL,
net_cash_received_from_reinsurance_business NUMERIC(26,2) DEFAULT NULL,
net_insurer_deposit_investment NUMERIC(26,2) DEFAULT NULL,
net_deal_trading_assets NUMERIC(26,2) DEFAULT NULL,
interest_and_commission_cashin NUMERIC(26,2) DEFAULT NULL,
net_increase_in_placements NUMERIC(26,2) DEFAULT NULL,
net_buyback NUMERIC(26,2) DEFAULT NULL,
net_loan_and_advance_increase NUMERIC(26,2) DEFAULT NULL,
net_deposit_in_cb_and_ib NUMERIC(26,2) DEFAULT NULL,
original_compensation_paid NUMERIC(26,2) DEFAULT NULL,
handling_charges_and_commission NUMERIC(26,2) DEFAULT NULL,
policy_dividend_cash_paid NUMERIC(26,2) DEFAULT NULL,
cash_from_mino_s_invest_sub NUMERIC(26,2) DEFAULT NULL,
proceeds_from_sub_to_mino_s NUMERIC(26,2) DEFAULT NULL,
investment_property_depreciation NUMERIC(26,2) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`start_date`,`end_date`,`report_type`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
b.Sname as company_name,
b.Symbol as code,
b.Exchange,
a.PUBLISHDATE as pub_date,
a.BEGINDATE as start_date,
a.ENDDATE as end_date,
a.REPORTDATETYPE as report_date,
a.REPORTTYPE as report_type,
a.DATASOURCE as source,
a.LABORGETCASH as goods_sale_and_service_render_cash,
a.TAXREFD as tax_levy_refund,
a.BIZCASHINFL as subtotal_operate_cash_inflow,
a.LABOPAYC as goods_and_services_cash_paid,
a.PAYWORKCASH as staff_behalf_paid,
a.PAYTAX as tax_payments,
a.BIZCASHOUTF as subtotal_operate_cash_outflow,
a.BIZNETCFLOW as net_operate_cash_flow,
a.WITHINVGETCASH as invest_withdrawal_cash,
a.INVERETUGETCASH as invest_proceeds,
a.FIXEDASSETNETC as fix_intan_other_asset_dispo_cash,
a.SUBSNETC as net_cash_deal_subcompany,
a.INVCASHINFL as subtotal_invest_cash_inflow,
a.ACQUASSETCASH as fix_intan_other_asset_acqui_cash,
a.INVPAYC as invest_cash_paid,
a.LOANNETR as impawned_loan_net_increase,
a.SUBSPAYNETCASH as net_cash_from_sub_company,
a.INVCASHOUTF as subtotal_invest_cash_outflow,
a.INVNETCASHFLOW as net_invest_cash_flow,
a.INVRECECASH as cash_from_invest,
a.RECEFROMLOAN as cash_from_borrowing,
a.ISSBDRECECASH as cash_from_bonds_issue,
a.RECEFINCASH as subtotal_finance_cash_inflow,
a.FINCASHINFL as borrowing_repayment,
a.DEBTPAYCASH as dividend_interest_payment,
a.DIVIPROFPAYCASH as subtotal_finance_cash_outflow,
a.FINNETCFLOW as net_finance_cash_flow,
a.CHGEXCHGCHGS as exchange_rate_change_effect,
a.CASHNETI as cash_equivalent_increase,
a.EQUOPENBALA as cash_equivalents_at_beginning,
a.EQUFINALBALA as cash_and_equivalents_at_end,
a.NETPROFIT as net_profit,
a.ASSEIMPA as assets_depreciation_reserves,
a.ASSEDEPR as fixed_assets_depreciation,
a.INTAASSEAMOR as intangible_assets_amortization,
a.LONGDEFEEXPENAMOR as defferred_expense_amortization,
a.DISPFIXEDASSETLOSS as fix_intan_other_asset_dispo_loss,
a.FIXEDASSESCRALOSS as fixed_asset_scrap_loss,
a.VALUECHGLOSS as fair_value_change_loss,
a.FINEXPE as financial_cost,
a.INVELOSS as invest_loss,
a.DEFETAXASSETDECR as deffered_tax_asset_decrease,
a.DEFETAXLIABINCR as deffered_tax_liability_increase,
a.INVEREDU as inventory_decrease,
a.RECEREDU as operate_receivables_decrease,
a.PAYAINCR as operate_payable_increase,
a.OTHER as others,
a.BIZNETCFLOW as net_operate_cash_flow_indirect,
a.DEBTINTOCAPI as debt_to_capital,
a.EXPICONVBD as cbs_expiring_in_one_year,
a.FINFIXEDASSET as financial_lease_fixed_assets,
a.CASHFINALBALA as cash_at_end,
a.CASHOPENBALA as cash_at_beginning,
a.EQUFINALBALA as equivalents_at_end,
a.EQUOPENBALA as equivalents_at_beginning,
a.CASHNETI as cash_equivalent_increase_indirect,
a.DEPONETR as net_deposit_increase,
a.BANKLOANNETINCR as net_borrowing_from_central_bank,
a.FININSTNETR as net_borrowing_from_finance_co,
a.INSPREMCASH as net_original_insurance_cash,
a.INSNETC as net_cash_received_from_reinsurance_business,
a.SAVINETR as net_insurer_deposit_investment,
a.DISPTRADNETINCR as net_deal_trading_assets,
a.CHARINTECASH as interest_and_commission_cashin,
a.FDSBORRNETR as net_increase_in_placements,
a.REPNETINCR as net_buyback,
a.LOANSNETR as net_loan_and_advance_increase,
a.TRADEPAYMNETR as net_deposit_in_cb_and_ib,
a.PAYCOMPGOLD as original_compensation_paid,
a.PAYINTECASH as handling_charges_and_commission,
a.PAYDIVICASH as policy_dividend_cash_paid,
a.SUBSRECECASH as cash_from_mino_s_invest_sub,
a.SUBSPAYDIVID as proceeds_from_sub_to_mino_s,
a.REALESTADEP as investment_property_depreciation,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where REPORTTYPE in ('2','4') and b.SType ='EQA' and b.Enabled=0 and b.status=0 and a.ACCSTACODE=11002"""
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=1)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncStkCashflowStatementParent()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkCashflowStatementParent()
processor.do_update()
elif args.report:
processor = SyncStkCashflowStatementParent()
processor.update_report(args.count, end_date)
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncCompanyInfo(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_COMP_INFO'
self.dest_table = 'stk_company_info'
super(SyncCompanyInfo, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(15) NOT NULL,
pub_date Date DEFAULT NULL,
company_id VARCHAR(10) NOT NULL,
full_name VARCHAR(200) DEFAULT NULL,
short_name VARCHAR(100) DEFAULT NULL,
english_name_full VARCHAR(300) DEFAULT NULL,
english_name VARCHAR(300) DEFAULT NULL,
type1 VARCHAR(10) DEFAULT NULL,
type2 VARCHAR(10) DEFAULT NULL,
islist INT DEFAULT NULL,
isbranche INT DEFAULT NULL,
establish_date Date DEFAULT NULL,
type VARCHAR(10) DEFAULT NULL,
reg_capital NUMERIC(19,2) DEFAULT NULL,
auth_share NUMERIC(19,0) DEFAULT NULL,
currency VARCHAR(10) DEFAULT NULL,
org_code VARCHAR(20) DEFAULT NULL,
region VARCHAR(10) DEFAULT NULL,
country VARCHAR(10) DEFAULT NULL,
chairman VARCHAR(100) DEFAULT NULL,
ceo VARCHAR(100) DEFAULT NULL,
leger VARCHAR(100) DEFAULT NULL,
secretary VARCHAR(50) DEFAULT NULL,
secretary_phone VARCHAR(100) DEFAULT NULL,
secretary_email VARCHAR(100) DEFAULT NULL,
security_representative VARCHAR(50) DEFAULT NULL,
lawfirm VARCHAR(100) DEFAULT NULL,
cpafirm VARCHAR(100) DEFAULT NULL,
business_scale VARCHAR(10) DEFAULT NULL,
register_location VARCHAR(200) DEFAULT NULL,
zipcode VARCHAR(20) DEFAULT NULL,
office VARCHAR(200) DEFAULT NULL,
telephone VARCHAR(100) DEFAULT NULL,
fax VARCHAR(100) DEFAULT NULL,
email VARCHAR(100) DEFAULT NULL,
website VARCHAR(100) DEFAULT NULL,
pub_url VARCHAR(100) DEFAULT NULL,
description TEXT DEFAULT NULL,
business_scope TEXT DEFAULT NULL,
main_business TEXT DEFAULT NULL,
license_number VARCHAR(50) DEFAULT NULL,
live_status VARCHAR(10) DEFAULT NULL,
live_begindate Date DEFAULT NULL,
live_enddate Date DEFAULT NULL,
is_valid INT NOT NULL,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) NOT NULL,
total_employees INT DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
b.Symbol as code,
b.Exchange,
a.PUBLISHDATE as pub_date,
a.COMPCODE as company_id,
a.COMPNAME as full_name,
a.COMPSNAME as short_name,
a.ENGNAME as english_name_full,
a.COMPSNAME as english_name,
a.COMPTYPE1 as type1,
a.COMPTYPE2 as type2,
a.ISLIST as islist,
a.ISBRANCH as isbranche,
a.FOUNDDATE as establish_date,
a.ORGTYPE as type,
a.REGCAPITAL as reg_capital,
a.AUTHCAPSK as auth_share,
a.CUR as currency,
a.ORGCODE as org_code,
a.REGION as region,
a.COUNTRY as country,
a.CHAIRMAN as chairman,
a.MANAGER as ceo,
a.LEGREP as leger,
a.BSECRETARY as secretary,
a.BSECRETARYTEL as secretary_phone,
a.BSECRETARYMAIL as secretary_email,
a.SEAFFREPR as security_representative,
a.LECONSTANT as lawfirm,
a.ACCFIRM as cpafirm,
a.BIZSCALE as business_scale,
a.REGADDR as register_location,
a.REGPTCODE as zipcode,
a.OFFICEADDR as office,
a.COMPTEL as telephone,
a.COMPFAX as fax,
a.COMPEMAIL as email,
a.COMPURL as website,
a.DISURL as pub_url,
a.COMPINTRO as description,
a.BIZSCOPE as business_scope,
a.MAJORBIZ as main_business,
a.BIZLICENSENO as license_number,
a.COMPSTATUS as live_status,
a.EXISTBEGDATE as live_begindate,
a.EXISTENDDATE as live_enddate,
a.ISVALID as is_valid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
a.WORKFORCE as total_employees,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncCompanyInfo()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncCompanyInfo()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkEmployeeInfo(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_COMP_EMPLOYEE'
self.dest_table = 'stk_employee_info'
super(SyncStkEmployeeInfo, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id   int NOT NULL,
symbol VARCHAR(20) NOT NULL,
company_name   VARCHAR(200) NOT NULL,
company_id   varchar(10) NOT NULL,
end_date   Date NOT NULL,
pub_date   Date DEFAULT NULL,
work_dorce   int DEFAULT NULL,
product_num   int DEFAULT NULL,
sales_num   int DEFAULT NULL,
financial_num   int DEFAULT NULL,
tech_num   int DEFAULT NULL,
reasearch_num   int DEFAULT NULL,
admin_nunm   int DEFAULT NULL,
retire_num   int DEFAULT NULL,
other_num   int DEFAULT NULL,
doctor_num   int DEFAULT NULL,
posrg_num   int DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`end_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
create_sql = " ".join(create_sql.split())
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPNAME as company_name,
a.COMPCODE as company_id,
a.ENDDATE as end_date,
a.DECLAREDATE as pub_date,
a.WORKFORCE as work_dorce,
a.PRODUCTIONSTAFF as product_num,
a.SALESPERSONS as sales_num,
a.FINANCIALSTAFF as financial_num,
a.TECHNICALSTAFF as tech_num,
a.RESEARCHSTAFF as reasearch_num,
a.ADMTRATIVESTAFF as admin_nunm,
a.RETIREESTAFF as retire_num,
a.OTHERSTAFF as other_num,
a.DRNUM as doctor_num,
a.POSTGRAD as posrg_num,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkEmployeeInfo()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkEmployeeInfo()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkExd(BaseSync):
def __init__(self, source=None, destination=None):
self.dest_table = 'stk_exd'
self.source_table = 'TQ_SK_XDRY'
super(SyncStkExd,self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id   INT NOT NULL,
symbol   VARCHAR(20) NOT NULL,
begin_date   DATE NOT NULL,
end_date   DATE NOT NULL,
exd_factor   NUMERIC(32,19) DEFAULT NULL,
back_exd   NUMERIC(29,16) DEFAULT NULL,
direct_exd   NUMERIC(29,16) DEFAULT NULL,
is_valid   INT NOT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`begin_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
create_sql = " ".join(create_sql.split())
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.BEGINDATE as begin_date,
a.ENDDATE as end_date,
a.XDY as exd_factor,
a.LTDXDY as back_exd,
a.THELTDXDY as direct_exd,
a.ISVALID as is_valid,
cast(a.tmstamp as bigint) as tmstamp,
b.Exchange,
b.SYMBOL as code
from TQ_SK_XDRY a
left join TQ_OA_STCODE as b
on b.SECODE = a.SECODE
where b.ISVALID=1 and b.LISTSTATUS=1 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkExd()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkExd()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkFinForcast(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_EXPTPERFORMANCE'
self.dest_table = 'stk_fin_forcast'
super(SyncStkFinForcast, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(20) NOT NULL,
company_id VARCHAR(10) NOT NULL,
pub_date DATE NOT NULL,
source VARCHAR(10) NOT NULL,
begin_date DATE NOT NULL,
end_date DATE NOT NULL,
base_begin_date DATE NOT NULL,
base_end_date DATE NOT NULL,
operating_income_estimate NUMERIC(18,6) DEFAULT NULL,
operating_income_increas_estimate NUMERIC(15,6) DEFAULT NULL,
operating_income_text VARCHAR(400) DEFAULT NULL,
operating_income_mark VARCHAR(10) DEFAULT NULL,
operating_profit_estimate NUMERIC(18,6) DEFAULT NULL,
operating_profit_increase_estimate NUMERIC(15,6) DEFAULT NULL,
operating_profit_text VARCHAR(400) DEFAULT NULL,
operating_profit_mark VARCHAR(10) DEFAULT NULL,
net_profit_top NUMERIC(18,6) DEFAULT NULL,
net_profit_bottom NUMERIC(18,6) DEFAULT NULL,
net_profit_increas_top NUMERIC(18,6) DEFAULT NULL,
net_profit_increas_bottom NUMERIC(18,6) DEFAULT NULL,
net_profit_estimate_top VARCHAR(10) DEFAULT NULL,
net_profit_estimate_bottom VARCHAR(10) DEFAULT NULL,
net_profit_estimate_text VARCHAR(400) DEFAULT NULL,
eps_top NUMERIC(15,6) DEFAULT NULL,
eps_bottom NUMERIC(15,6) DEFAULT NULL,
eps_estimate_top VARCHAR(10) DEFAULT NULL,
eps_estimate_bottom VARCHAR(10) DEFAULT NULL,
isvalid INT DEFAULT NULL,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) NOT NULL,
currency VARCHAR(10) DEFAULT NULL,
eps_increase_estimate_top NUMERIC(15,6) DEFAULT NULL,
eps_increase_estimate_bottom NUMERIC(15,6) DEFAULT NULL,
exp_year VARCHAR(4) DEFAULT NULL,
exp_type VARCHAR(10) DEFAULT NULL,
report_type VARCHAR(10) DEFAULT NULL,
estimate_origin_text TEXT DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`pub_date`,`source`,`begin_date`,`end_date`,`base_begin_date`,`base_end_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
a.PUBLISHDATE as pub_date,
a.DATASOURCE as source,
a.SESSIONBEGDATE as begin_date,
a.SESSIONENDDATE as end_date,
a.BASESSIONBEGDATE as base_begin_date,
a.BASESSIONENDDATE as base_end_date,
a.OPERMINCOME as operating_income_estimate,
a.OPERMINCOMEINC as operating_income_increas_estimate,
a.OPERMINCOMEDES as operating_income_text,
a.OPERMINCOMEMK as operating_income_mark,
a.OPERMPROFIT as operating_profit_estimate,
a.OPERMPROFITINC as operating_profit_increase_estimate,
a.OPERMPROFITDES as operating_profit_text,
a.OPERMPROFITMK as operating_profit_mark,
a.RETAMAXPROFITS as net_profit_top,
a.RETAMINPROFITS as net_profit_bottom,
a.RETAMAXPROFITSINC as net_profit_increas_top,
a.RETAMINPROFITSINC as net_profit_increas_bottom,
a.RETAMAXPROFITSMK as net_profit_estimate_top,
a.RETAMINPROFITSMK as net_profit_estimate_bottom,
a.RETAPROFITSDES as net_profit_estimate_text,
a.EPSMAXFORE as eps_top,
a.EPSMINFORE as eps_bottom,
a.EPSMAXFOREMK as eps_estimate_top,
a.EPSMINFOREMK as eps_estimate_bottom,
a.ISVALID as isvalid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
a.CUR as currency,
a.EPSMAXFOREINC as eps_increase_estimate_top,
a.EPSMINFOREINC as eps_increase_estimate_bottom,
a.EXPTYEAR as exp_year,
a.EXPTTYPE as exp_type,
a.GLOBALEXPTMOD as report_type,
a.EXPTORIGTEXT as estimate_origin_text,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkFinForcast()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkFinForcast()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkHolderNum(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_SHAREHOLDERNUM'
self.dest_table = 'stk_holder_num'
super(SyncStkHolderNum, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(15) NOT NULL,
company_id VARCHAR(10) NOT NULL,
end_date DATE NOT NULL,
pub_date DATE DEFAULT NULL,
share_holders NUMERIC(10,0) DEFAULT NULL,
total_share NUMERIC(19,0) DEFAULT NULL,
share_holders_ave NUMERIC(19,4) DEFAULT NULL,
share_holders_ave_ratio NUMERIC(10,6) DEFAULT NULL,
a_share_holders NUMERIC(10,0) DEFAULT NULL,
a_share NUMERIC(19,0) DEFAULT NULL,
a_share_holders_ave NUMERIC(19,4) DEFAULT NULL,
a_share_holders_ave_ratio NUMERIC(10,6) DEFAULT NULL,
b_share_holders NUMERIC(10,0) DEFAULT NULL,
h_share_holders NUMERIC(10,0) DEFAULT NULL,
memo VARCHAR(400) DEFAULT NULL,
is_valid INT DEFAULT NULL,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) NOT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`end_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
b.Symbol as code,
b.Exchange,
a.COMPCODE as company_id,
a.ENDDATE as end_date,
a.PUBLISHDATE as pub_date,
a.TOTALSHAMT as share_holders,
a.TOTALSHARE as total_share,
a.KAVGSH as share_holders_ave,
a.HOLDPROPORTIONPACC as share_holders_ave_ratio,
a.ASKSHAMT as a_share_holders,
a.ASK as a_share,
a.ASKAVGSH as a_share_holders_ave,
a.AHOLDPROPORTIONPACC as a_share_holders_ave_ratio,
a.BSKSHAMT as b_share_holders,
a.HSKSHAMT as h_share_holders,
a.MEMO as memo,
a.ISVALID as is_valid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkHolderNum()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkHolderNum()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkIncomeStatementParent(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_FIN_PROINCSTATEMENTNEW'
self.dest_table = 'stk_income_statement_parent'
super(SyncStkIncomeStatementParent, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id VARCHAR(12) NOT NULL,
company_id VARCHAR(12) NOT NULL,
company_name VARCHAR(50) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
symbol VARCHAR(12) NOT NULL,
report_type VARCHAR(2) NOT NULL,
report_date VARCHAR(10) NOT NULL,
pub_date DATE NOT NULL,
start_date DATE NOT NULL,
end_date DATE NOT NULL,
source VARCHAR(10) NOT NULL,
total_operating_revenue NUMERIC(26,2) DEFAULT NULL,
operating_revenue NUMERIC(26,2) DEFAULT NULL,
total_operating_cost NUMERIC(26,2) DEFAULT NULL,
operating_cost NUMERIC(26,2) DEFAULT NULL,
operating_tax_surcharges NUMERIC(26,2) DEFAULT NULL,
sale_expense NUMERIC(26,2) DEFAULT NULL,
administration_expense NUMERIC(26,2) DEFAULT NULL,
financial_expense NUMERIC(26,2) DEFAULT NULL,
asset_impairment_loss NUMERIC(26,2) DEFAULT NULL,
fair_value_variable_income NUMERIC(26,2) DEFAULT NULL,
investment_income NUMERIC(26,2) DEFAULT NULL,
invest_income_associates NUMERIC(26,2) DEFAULT NULL,
exchange_income NUMERIC(26,2) DEFAULT NULL,
operating_profit NUMERIC(26,2) DEFAULT NULL,
subsidy_income NUMERIC(26,2) DEFAULT NULL,
non_operating_revenue NUMERIC(26,2) DEFAULT NULL,
non_operating_expense NUMERIC(26,2) DEFAULT NULL,
disposal_loss_non_current_liability NUMERIC(26,2) DEFAULT NULL,
total_profit NUMERIC(26,2) DEFAULT NULL,
income_tax NUMERIC(26,2) DEFAULT NULL,
net_profit NUMERIC(26,2) DEFAULT NULL,
np_parent_company_owners NUMERIC(26,2) DEFAULT NULL,
minority_profit NUMERIC(26,2) DEFAULT NULL,
basic_eps NUMERIC(30,6) DEFAULT NULL,
diluted_eps NUMERIC(30,6) DEFAULT NULL,
other_composite_income NUMERIC(26,2) DEFAULT NULL,
total_composite_income NUMERIC(26,2) DEFAULT NULL,
ci_parent_company_owners NUMERIC(26,2) DEFAULT NULL,
ci_minority_owners NUMERIC(26,2) DEFAULT NULL,
interest_income NUMERIC(26,2) DEFAULT NULL,
premiums_earned NUMERIC(26,2) DEFAULT NULL,
commission_income NUMERIC(26,2) DEFAULT NULL,
interest_expense NUMERIC(26,2) DEFAULT NULL,
commission_expense NUMERIC(26,2) DEFAULT NULL,
refunded_premiums NUMERIC(26,2) DEFAULT NULL,
net_pay_insurance_claims NUMERIC(26,2) DEFAULT NULL,
withdraw_insurance_contract_reserve NUMERIC(26,2) DEFAULT NULL,
policy_dividend_payout NUMERIC(26,2) DEFAULT NULL,
reinsurance_cost NUMERIC(26,2) DEFAULT NULL,
non_current_asset_disposed NUMERIC(26,2) DEFAULT NULL,
other_earnings NUMERIC(26,2) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`start_date`,`end_date`,`report_type`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
b.SName as company_name,
b.Symbol as code,
b.Exchange,
a.PUBLISHDATE as pub_date,
a.BEGINDATE as start_date,
a.ENDDATE as end_date,
a.REPORTDATETYPE as report_date,
a.REPORTTYPE as report_type,
a.DATASOURCE as source,
a.BIZTOTINCO as total_operating_revenue,
a.BIZINCO as operating_revenue,
a.BIZTOTCOST as total_operating_cost,
a.BIZCOST as operating_cost,
a.BIZTAX as operating_tax_surcharges,
a.SALESEXPE as sale_expense,
a.MANAEXPE as administration_expense,
a.FINEXPE as financial_expense,
a.ASSEIMPALOSS as asset_impairment_loss,
a.VALUECHGLOSS as fair_value_variable_income,
a.INVEINCO as investment_income,
a.ASSOINVEPROF as invest_income_associates,
a.EXCHGGAIN as exchange_income,
a.PERPROFIT as operating_profit,
a.SUBSIDYINCOME as subsidy_income,
a.NONOREVE as non_operating_revenue,
a.NONOEXPE as non_operating_expense,
a.NONCASSETSDISL as disposal_loss_non_current_liability,
a.TOTPROFIT as total_profit,
a.INCOTAXEXPE as income_tax,
a.NETPROFIT as net_profit,
a.PARENETP as np_parent_company_owners,
a.MINYSHARRIGH as minority_profit,
a.BASICEPS as basic_eps,
a.DILUTEDEPS as diluted_eps,
a.OTHERCOMPINCO as other_composite_income,
a.COMPINCOAMT as total_composite_income,
a.PARECOMPINCOAMT as ci_parent_company_owners,
a.MINYSHARINCOAMT as ci_minority_owners,
a.INTEINCO as interest_income,
a.EARNPREM as premiums_earned,
a.POUNINCO as commission_income,
a.INTEEXPE as interest_expense,
a.POUNEXPE as commission_expense,
a.SURRGOLD as refunded_premiums,
a.COMPNETEXPE as net_pay_insurance_claims,
a.CONTRESS as withdraw_insurance_contract_reserve,
a.POLIDIVIEXPE as policy_dividend_payout,
a.REINEXPE as reinsurance_cost,
a.NONCASSETSDISI as non_current_asset_disposed,
a.OTHERINCO as other_earnings,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where REPORTTYPE in ('2','4') and b.SType ='EQA' and b.Enabled=0 and b.status=0 and a.ACCSTACODE=11002"""
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=1)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncStkIncomeStatementParent()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkIncomeStatementParent()
processor.do_update()
elif args.report:
processor = SyncStkIncomeStatementParent()
processor.update_report(args.count, end_date)
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkManagementInfo(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_COMP_MANAGER'
self.dest_table = 'stk_management_info'
super(SyncStkManagementInfo,self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(15) NOT NULL,
update_date DATE NOT NULL,
company_code VARCHAR(20) NOT NULL,
Job_attribute VARCHAR(10) NOT NULL,
job_code VARCHAR(10) NOT NULL,
job_mode VARCHAR(10) NOT NULL,
job_name VARCHAR(100) NOT NULL,
person_code VARCHAR(20) NOT NULL,
name VARCHAR(100) NOT NULL,
board_session INT DEFAULT NULL,
employment_session INT DEFAULT NULL,
status VARCHAR(10) DEFAULT NULL,
begin_date DATE NOT NULL,
end_date DATE NOT NULL,
dimission_reason VARCHAR(10) DEFAULT NULL,
is_dimission INT DEFAULT NULL,
memo VARCHAR(1000) DEFAULT NULL,
is_valid INT NOT NULL,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) NOT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`person_code`,`job_name`,`begin_date`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.UPDATEDATE as update_date,
a.COMPCODE as company_code,
a.POSTYPE as Job_attribute,
a.DUTYCODE as job_code,
a.DUTYMOD as job_mode,
a.ACTDUTYNAME as job_name,
a.PERSONALCODE as person_code,
a.CNAME as name,
a.MGENTRYS as board_session,
a.DENTRYS as employment_session,
a.NOWSTATUS as status,
a.BEGINDATE as begin_date,
a.ENDDATE as end_date,
a.DIMREASON as dimission_reason,
a.ISRELDIM as is_dimission,
a.MEMO as memo,
a.ISVALID as is_valid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkManagementInfo()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkManagementInfo()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncSecurityInfo(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_BASICINFO'
self.dest_table = 'stk_security_info'
super(SyncSecurityInfo,self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
company_id VARCHAR(20) NOT NULL,
symbol VARCHAR(20) NOT NULL,
exchange VARCHAR(10) NOT NULL,
security_type VARCHAR(10) NOT NULL,
short_name VARCHAR(100) NOT NULL,
english_name VARCHAR(100) DEFAULT NULL,
decnum INT DEFAULT NULL,
currency VARCHAR(10) NOT NULL,
isin_code VARCHAR(20) DEFAULT NULL,
sedol_code VARCHAR(20) DEFAULT NULL,
pairvalue NUMERIC(19,2) DEFAULT NULL,
total_shares NUMERIC(15,6) DEFAULT NULL,
lists_tatus VARCHAR(10) NOT NULL,
list_date DATE NOT NULL,
ipo_price NUMERIC(9,4) DEFAULT NULL,
delist_date DATE NOT NULL,
delist_price NUMERIC(9,4) DEFAULT NULL,
sfc_industry1_code VARCHAR(10) DEFAULT NULL,
sfc_industry1_name VARCHAR(100) DEFAULT NULL,
sfc_industry2_code VARCHAR(10) DEFAULT NULL,
sfc_industry2_name VARCHAR(100) DEFAULT NULL,
gics_industry1_code VARCHAR(10) DEFAULT NULL,
gics_industry1_name VARCHAR(100) DEFAULT NULL,
gics_industry2_code VARCHAR(10) DEFAULT NULL,
gics_industry2_name VARCHAR(100) DEFAULT NULL,
sw_industry1_code VARCHAR(10) DEFAULT NULL,
sw_industry1_name VARCHAR(100) DEFAULT NULL,
sw_industry2_code VARCHAR(10) DEFAULT NULL,
sw_industry2_name VARCHAR(100) DEFAULT NULL,
csi_industry1_code VARCHAR(10) DEFAULT NULL,
csi_industry1_name VARCHAR(100) DEFAULT NULL,
csi_industry2_code VARCHAR(10) DEFAULT NULL,
csi_industry2_name VARCHAR(100) DEFAULT NULL,
province_code VARCHAR(10) DEFAULT NULL,
province_name VARCHAR(100) DEFAULT NULL,
city_code VARCHAR(10) DEFAULT NULL,
city_name VARCHAR(100) DEFAULT NULL,
isvalid INT DEFAULT NULL,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) DEFAULT NULL,
value_currency VARCHAR(10) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
a.SYMBOL as code,
a.EXCHANGE as exchange,
a.SETYPE as security_type,
a.SESNAME as short_name,
a.SEENGNAME as english_name,
a.DECNUM as decnum,
a.CUR as currency,
a.ISINCODE as isin_code,
a.SEDOLCODE as sedol_code,
a.PARVALUE as pairvalue,
a.TOTALSHARE as total_shares,
a.LISTSTATUS as lists_tatus,
a.LISTDATE as list_date,
a.LISTOPRICE as ipo_price,
a.DELISTDATE as delist_date,
a.DELISTCPRICE as delist_price,
a.CSRCLEVEL1CODE as sfc_industry1_code,
a.CSRCLEVEL1NAME as sfc_industry1_name,
a.CSRCLEVEL2CODE as sfc_industry2_code,
a.CSRCLEVEL2NAME as sfc_industry2_name,
a.GICSLEVEL1CODE as gics_industry1_code,
a.GICSLEVEL1NAME as gics_industry1_name,
a.GICSLEVEL2CODE as gics_industry2_code,
a.GICSLEVEL2NAME as gics_industry2_name,
a.SWLEVEL1CODE as sw_industry1_code,
a.SWLEVEL1NAME as sw_industry1_name,
a.SWLEVEL2CODE as sw_industry2_code,
a.SWLEVEL2NAME as sw_industry2_name,
a.CSILEVEL1CODE as csi_industry1_code,
a.CSILEVEL1NAME as csi_industry1_name,
a.CSILEVEL2CODE as csi_industry2_code,
a.CSILEVEL2NAME as csi_industry2_name,
a.PROVINCECODE as province_code,
a.PROVINCENAME as province_name,
a.CITYCODE as city_code,
a.CITYNAME as city_name,
a.ISVALID as isvalid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
a.VALUECUR as value_currency,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
where a.LISTSTATUS=1 and (a.EXCHANGE='001002' or a.EXCHANGE='001003') """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['exchange'] == '001002',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncSecurityInfo()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncSecurityInfo()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkShareholderFloatingTop10(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_OTSHOLDER'
self.dest_table = 'stk_shareholder_floating_top10'
super(SyncStkShareholderFloatingTop10, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(20) NOT NULL,
pub_date DATE NOT NULL,
end_date DATE NOT NULL,
company_id VARCHAR(10) NOT NULL,
shareholder_id VARCHAR(10) DEFAULT NULL,
shareholder_name VARCHAR(200) NOT NULL,
shareholder_class VARCHAR(10) DEFAULT NULL,
sharesnature VARCHAR(10) DEFAULT NULL,
shareholder_rank NUMERIC(10,0) NOT NULL,
share_number NUMERIC(26,2) DEFAULT NULL,
total_share_ratio NUMERIC(12,6) DEFAULT NULL,
a_share_ratio NUMERIC(8,4) DEFAULT NULL,
a_share_number NUMERIC(26,2) DEFAULT NULL,
b_share_number NUMERIC(26,2) DEFAULT NULL,
h_share_number NUMERIC(26,2) DEFAULT NULL,
share_number_change NUMERIC(16,0) DEFAULT NULL,
share_pledge NUMERIC(16,0) DEFAULT NULL,
share_freeze NUMERIC(16,0) DEFAULT NULL,
freeze_reason VARCHAR(200) DEFAULT NULL,
a1_share_ratio NUMERIC(12,6) DEFAULT NULL,
is_history INT DEFAULT NULL,
update_date DATE DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`end_date`,`shareholder_name`,`shareholder_rank`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.PUBLISHDATE as pub_date,
a.ENDDATE as end_date,
a.COMPCODE as company_id,
a.SHHOLDERCODE as shareholder_id,
a.SHHOLDERNAME as shareholder_name,
a.SHHOLDERTYPE as shareholder_class,
a.SHHOLDERNATURE as sharesnature,
a.RANK as shareholder_rank,
a.HOLDERAMT as share_number,
a.PCTOFFLOATSHARES as total_share_ratio,
a.HOLDERRTO as a_share_ratio,
a.HOLDERANUM as a_share_number,
a.HOLDERBNUM as b_share_number,
a.HOLDERHNUM as h_share_number,
a.HOLDERSUMCHG as share_number_change,
a.PLEDGEINVOLVEDSUM as share_pledge,
a.FREEZEINVOLVEDSUM as share_freeze,
a.PFSTATEMENT as freeze_reason,
a.PCTOFFLOTSHARES as a1_share_ratio,
a.ISHIS as is_history,
a.UPDATEDATE as update_date,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkShareholderFloatingTop10()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkShareholderFloatingTop10()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkShareholderTop10(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_SHAREHOLDER'
self.dest_table = 'stk_shareholder_top10'
super(SyncStkShareholderTop10, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(20) NOT NULL,
pub_date DATE NOT NULL,
end_date DATE NOT NULL,
company_id VARCHAR(10) NOT NULL,
shareholder_id VARCHAR(10) DEFAULT NULL,
shareholder_name VARCHAR(200) NOT NULL,
shareholder_class VARCHAR(10) NOT NULL,
sharesnature VARCHAR(10) NOT NULL,
shareholder_rank NUMERIC(10,0) NOT NULL,
sharesnature_id VARCHAR(100) DEFAULT NULL,
share_number NUMERIC(26,2) DEFAULT NULL,
share_ratio NUMERIC(8,4) DEFAULT NULL,
share_pledge_freeze NUMERIC(26,2) DEFAULT NULL,
share_change NUMERIC(26,2) DEFAULT NULL,
is_history INT DEFAULT NULL,
update_date DATE DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`end_date`,`shareholder_name`,`shareholder_rank`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.PUBLISHDATE as pub_date,
a.ENDDATE as end_date,
a.COMPCODE as company_id,
a.SHHOLDERCODE as shareholder_id,
a.SHHOLDERNAME as shareholder_name,
a.SHHOLDERTYPE as shareholder_class,
a.SHHOLDERNATURE as sharesnature,
a.RANK as shareholder_rank,
a.SHARESTYPE as sharesnature_id,
a.HOLDERAMT as share_number,
a.HOLDERRTO as share_ratio,
a.PFHOLDERAMT as share_pledge_freeze,
a.CURCHG as share_change,
a.ISHIS as is_history,
a.UPDATEDATE as update_date,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkShareholderTop10()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkShareholderTop10()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
from datetime import datetime
import pdb
import sqlalchemy as sa
import numpy as np
import pandas as pd
from sqlalchemy.orm import sessionmaker
import sys
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkStatusChange(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_COMP_INFOCHG'
self.dest_table = 'stk_status_change'
super(SyncStkStatusChange, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
symbol VARCHAR(15) NOT NULL,
company_id VARCHAR(10),
pub_date DATE,
change_type_id VARCHAR(10),
begin_change DATE,
end_change DATE,
change_before TEXT DEFAULT NULL,
change_after TEXT DEFAULT NULL,
change_reason TEXT DEFAULT NULL,
isvalid INT,
entry_date DATE NOT NULL,
entry_time VARCHAR(8) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`pub_date`,`change_type_id`,`begin_change`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
a.COMPCODE as company_id,
a.PUBLISHDATE as pub_date,
a.CHGTYPE as change_type_id,
a.BEGINDATE as begin_change,
a.ENDDATE as end_change,
a.BECHG as change_before,
a.AFCHG as change_after,
a.CHGEXP as change_reason,
a.ISVALID as isvalid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
cast(a.tmstamp as bigint) as tmstamp,
b.Symbol as code,
b.Exchange
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.Status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == 'CNSESH',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkStatusChange()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkStatusChange()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import argparse
import sys
import numpy as np
import pandas as pd
sys.path.append('..')
sys.path.append('../..')
from sync.base_sync import BaseSync
from sync.tm_import_utils import TmImportUtils
class SyncStkXrXd(BaseSync):
def __init__(self, source=None, destination=None):
self.source_table = 'TQ_SK_PRORIGHTS'
self.dest_table = 'stk_xr_xd'
super(SyncStkXrXd, self).__init__(self.dest_table)
self.utils = TmImportUtils(self.source, self.destination, self.source_table, self.dest_table)
# 创建目标表
def create_dest_tables(self):
self.utils.update_update_log(0)
create_sql = """create table {0}
(
id INT NOT NULL,
pub_date DATE,
update_date DATE,
sec_code VARCHAR(20) NOT NULL,
symbol VARCHAR(20) NOT NULL,
company_id VARCHAR(20) NOT NULL,
divdence_year VARCHAR(20) NOT NULL,
date_type VARCHAR(10) NOT NULL,
divdence_type VARCHAR(10) NOT NULL,
rank_num INT NOT NULL,
issue_object_type VARCHAR(10) NOT NULL,
issue_object VARCHAR(400) DEFAULT NULL,
project_type VARCHAR(10) NOT NULL,
currency VARCHAR(10) NOT NULL,
equity_base_date DATE,
equity_base NUMERIC(19,0) DEFAULT NULL,
record_date DATE,
xdr_date DATE,
lasttrade_date DATE,
aftertax_earning NUMERIC(19,6) DEFAULT NULL,
qfii_aftertax_earning NUMERIC(19,6) DEFAULT NULL,
cash_begindate DATE,
cash_enddate DATE,
share_deliveryratio NUMERIC(19,10) DEFAULT NULL,
capital_transferratio NUMERIC(19,10) DEFAULT NULL,
share_donationratio NUMERIC(19,10) DEFAULT NULL,
share_arrivaldate DATE,
list_date DATE,
buyback_date DATE,
buyback_deadline date,
sharereform_date DATE,
meeting_pubdate DATE,
is_newplan INT NOT NULL,
xdr_statement VARCHAR(2000) DEFAULT NULL,
is_valid INT DEFAULT NULL,
entry_date DATE,
entry_time VARCHAR(8) DEFAULT NULL,
tmstamp bigint not null,
PRIMARY KEY(`symbol`,`sec_code`,`divdence_year`,`date_type`,`divdence_type`,`rank_num`,`project_type`)
)
ENGINE=InnoDB DEFAULT CHARSET=utf8;""".format(self.dest_table)
create_sql = create_sql.replace('\n', '')
self.create_table(create_sql)
def get_sql(self, type):
sql = """select {0}
a.ID as id,
b.Exchange,
a.PUBLISHDATE as pub_date,
a.UPDATEDATE as update_date,
a.SECODE as sec_code,
a.SYMBOL as code,
a.COMPCODE as company_id,
a.DIVIYEAR as divdence_year,
a.DATETYPE as date_type,
a.DIVITYPE as divdence_type,
a.RANKNUM as rank_num,
a.GRAOBJTYPE as issue_object_type,
a.GRAOBJ as issue_object,
a.PROJECTTYPE as project_type,
a.CUR as currency,
a.SHCAPBASEDATE as equity_base_date,
a.SHCAPBASEQTY as equity_base,
a.EQURECORDDATE as record_date,
a.XDRDATE as xdr_date,
a.LASTTRADDAE as lasttrade_date,
a.AFTTAXCASHDVCNY as aftertax_earning,
a.AFTTAXCASHDVCNYQFII as qfii_aftertax_earning,
a.CASHDVARRBEGDATE as cash_begindate,
a.CASHDVARRENDDATE as cash_enddate,
a.PROBONUSRT as share_deliveryratio,
a.TRANADDRT as capital_transferratio,
a.BONUSRT as share_donationratio,
a.SHARRDATE as share_arrivaldate,
a.LISTDATE as list_date,
a.REPUBEGDATE as buyback_date,
a.REPUENDDATE as buyback_deadline,
a.ASSREPLACDATE as sharereform_date,
a.SHHDMEETRESPUBDATE as meeting_pubdate,
a.ISNEWEST as is_newplan,
a.DIVIEXPMEMO as xdr_statement,
a.ISVALID as is_valid,
a.ENTRYDATE as entry_date,
a.ENTRYTIME as entry_time,
cast(a.tmstamp as bigint) as tmstamp
from {1} a
left join FCDB.dbo.SecurityCode as b
on b.CompanyCode = a.COMPCODE
where b.SType ='EQA' and b.Enabled=0 and b.status=0 """
if type == 'report':
sql = sql.format('', self.source_table)
return sql
elif type == 'update':
sql += 'and cast(a.tmstamp as bigint) > {2} order by a.tmstamp'
return sql
def get_datas(self, tm):
print('正在查询', self.source_table, '表大于', tm, '的数据')
sql = self.get_sql('update')
sql = sql.format('top 10000', self.source_table, tm).replace('\n', '')
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
def update_table_data(self, tm):
while True:
result_list = self.get_datas(tm)
if not result_list.empty:
result_list['symbol'] = np.where(result_list['Exchange'] == '001002',
result_list['code'] + '.XSHG',
result_list['code'] + '.XSHE')
result_list.drop(['Exchange', 'code'], axis=1, inplace=True)
try:
result_list.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(result_list)
max_tm = result_list['tmstamp'][result_list['tmstamp'].size - 1]
self.utils.update_update_log(max_tm)
tm = max_tm
else:
break
def do_update(self):
max_tm = self.utils.get_max_tm_source()
log_tm = self.utils.get_max_tm_log()
if max_tm > log_tm:
self.update_table_data(log_tm)
def update_report(self, count, end_date):
self.utils.update_report(count, end_date, self.get_sql('report'))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
processor = SyncStkXrXd()
processor.create_dest_tables()
processor.do_update()
elif args.update:
processor = SyncStkXrXd()
processor.do_update()
#!/usr/bin/env python
# coding=utf-8
import pdb
import sys
import os
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
import argparse
from base_sync import BaseSync
sys.path.append('..')
from datetime import datetime, date
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import config
from utillities.sync_util import SyncUtil
class SyncIndex(BaseSync):
def __init__(self):
self.sync_util = SyncUtil()
super(SyncIndex, self).__init__('index')
self.source = sa.create_engine("mssql+pymssql://read:read@192.168.100.87:1433/FCDB")
self.dir = config.RECORD_BASE_DIR + self.dest_table + '/'
def create_dest_tables(self):
create_sql = """create table `{0}`(
`id` varchar(128) NOT NULL,
`isymbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`iname` varchar(128) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`symbol` varchar(32) NOT NULL,
`sname` varchar(128) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`weighing` decimal(8,2) DEFAULT NULL,
PRIMARY KEY(`id`,`trade_date`,`isymbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8
PARTITION BY RANGE (to_days(trade_date))
(PARTITION p0 VALUES LESS THAN (TO_DAYS('20000101')) ENGINE = InnoDB);
""".format(self.dest_table)
self.create_table(create_sql)
self.create_index()
self.build_history_partion(2000)
def create_index(self):
session = self.dest_session()
indexs = [
'CREATE INDEX index_trade_date_isymbol_index ON `index` (trade_date, isymbol);',
'CREATE INDEX index_trade_date_symbol_index ON `index` (trade_date, symbol);'
]
for sql in indexs:
session.execute(sql)
session.commit()
session.close()
def build_history_partion(self, start_year):
print('正在生成', start_year, '年之后的历史分区表')
current_year = datetime.now().year
current_month = datetime.now().month
session = self.dest_session()
for i in range(start_year, current_year):
print(i)
for j in range(1, 13):
if j < 10:
j = '0' + str(j)
session.execute(
'''call SP_TABLE_PARTITION_AUTO('index','{0}','par_index');'''
.format(str(i) + str(j))
)
for j in range(1, current_month):
if j < 10:
j = '0' + str(j)
session.execute(
'''call SP_TABLE_PARTITION_AUTO('index','{0}','par_index');'''
.format(str(current_year) + str(j))
)
def get_index_sets(self, trade_date):
sql = """SELECT Isymbol as icode, Iexchange as iexchange, Iname as iname, Tdate as trade_date,
Symbol as code ,Exchange , Sname as sname, Weighing as weighing from FCDB.dbo.issweight
where Isymbol in ('000300','000906','000985','399005','399006','000852','000905','399102','000016')
and Tdate = '{0}';""".format(trade_date)
return pd.read_sql(sql, self.source)
def do_update(self, start_date, end_date, count, order='DESC'):
# 读取交易日
trade_sets = self.sync_util.get_trades_ago('001002', start_date, end_date, count, order)
trade_list = list(trade_sets['TRADEDATE'])
session = self.dest_session()
for trade_date in trade_list:
print(trade_date)
index_sets = self.get_index_sets(trade_date)
if index_sets.empty:
continue
try:
index_sets['symbol'] = np.where(index_sets['Exchange'] == 'CNSESH',
index_sets['code'] + '.XSHG',
index_sets['code'] + '.XSHE')
index_sets['isymbol'] = np.where(index_sets['iexchange'] == 'CNSESH',
index_sets['icode'] + '.XSHG',
index_sets['icode'] + '.XSHE')
index_sets['id'] = index_sets['symbol'] + str(trade_date) + index_sets['iexchange'] + index_sets[
'isymbol']
index_sets.drop(['iexchange', 'Exchange', 'code', 'icode'], axis=1, inplace=True)
# 本地保存
if not os.path.exists(self.dir):
os.makedirs(self.dir)
file_name = self.dir + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
index_sets.to_csv(file_name, encoding='UTF-8')
# 数据库保存
session.execute('''call SP_TABLE_PARTITION_AUTO('index','{0}','par_index');'''.format(trade_date))
try:
self.delete_trade_data(trade_date)
index_sets.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as sql_err:
print(sql_err.orig.msg)
self.insert_or_update(index_sets)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncIndex()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
if args.update:
processor = SyncIndex()
processor.do_update(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncIndex()
start_date = processor.get_start_date()
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
#!/usr/bin/env python
# coding=utf-8
import pdb
import sys
import os
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
import argparse
from datetime import datetime, date
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from base_sync import BaseSync
sys.path.append('..')
import config
from utillities.sync_util import SyncUtil
class SyncIndexDailyPrice(BaseSync):
def __init__(self):
self.sync_util = SyncUtil()
super(SyncIndexDailyPrice, self).__init__('index_daily_price')
self.dir = config.RECORD_BASE_DIR + self.dest_table + '/'
def create_dest_tables(self):
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(32) NOT NULL,
`trade_date` date NOT NULL,
`name` varchar(50) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`pre_close` decimal(15,6) DEFAULT NULL,
`open` decimal(15,6) DEFAULT NULL,
`close` decimal(15,6) DEFAULT NULL,
`high` decimal(15,6) DEFAULT NULL,
`low` decimal(15,6) DEFAULT NULL,
`volume` decimal(20,2) DEFAULT NULL,
`money` decimal(18,3) DEFAULT NULL,
`deals` decimal(10,0) DEFAULT NULL,
`change` decimal(9,4) DEFAULT NULL,
`change_pct` decimal(8,4) DEFAULT NULL,
`tot_mkt_cap` decimal(18,4) DEFAULT NULL,
PRIMARY KEY(`id`,`trade_date`,`symbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
""".format(self.dest_table)
self.create_table(create_sql)
def get_index_sets(self, trade_date):
sql = """SELECT TRADEDATE as trade_date,
i.exchange as Exchange,
s.symbol as code,
INDEXNAME as name,
LCLOSE as pre_close,
TOPEN as 'open',
TCLOSE as 'close',
THIGH as high,
TLOW as low,
VOL as volume,
AMOUNT as money,
DEALS as deals,
CHANGE as change,
PCHG as change_pct,
TOTMKTCAP as tot_mkt_cap
from QADB.dbo.TQ_QT_INDEX i
left join TQ_OA_STCODE s on i.SECODE = s.secode
where (i.exchange = '001002' or i.exchange = '001003') and i.ISVALID = 1 and s.ISVALID = 1 and TRADEDATE = '{0}';""".format(
trade_date)
return pd.read_sql(sql, self.source)
def do_update(self, start_date, end_date, count, order='DESC'):
# 读取交易日
# 路径 index/
trade_sets = self.sync_util.get_trades_ago('001002', start_date, end_date, count, order)
trade_list = list(trade_sets['TRADEDATE'])
for trade_date in trade_list:
print(trade_date)
index_sets = self.get_index_sets(trade_date)
if index_sets.empty:
continue
try:
index_sets['symbol'] = np.where(index_sets['Exchange'] == '001002',
index_sets['code'] + '.XSHG',
index_sets['code'] + '.XSHE')
index_sets['id'] = index_sets['symbol'] + str(trade_date)
index_sets.drop(['Exchange', 'code'], axis=1, inplace=True)
# 本地保存
if not os.path.exists(self.dir):
os.makedirs(self.dir)
file_name = self.dir + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
index_sets.to_csv(file_name, encoding='UTF-8')
# 数据库保存
try:
self.delete_trade_data(trade_date)
index_sets.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as sql_err:
print(sql_err.orig.msg)
self.insert_or_update(index_sets)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncIndexDailyPrice()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
if args.update:
processor = SyncIndexDailyPrice()
processor.do_update(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncIndexDailyPrice()
start_date = processor.get_start_date()
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
@version: ??
@author: li
@file: __init__.py.py
@time: 2019-07-02 16:09
"""
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
import pdb
import collections
import numpy as np
from datetime import datetime
import sqlalchemy as sa
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import argparse
from sync_fundamentals import SyncFundamentals
class SyncBalance(object):
def __init__(self):
self._sync_fun = SyncFundamentals(None, None, 'balance')
self._sync_fun.secondary_sets(self.secondary_sets)
def create_dest_tables(self):
self._sync_fun.create_dest_tables('balance')
def create_dest_report_tables(self):
self._sync_fun.create_dest_report_tables('balance_report')
def create_columns(self):
columns_list = collections.OrderedDict()
columns_list['cash_equivalents'] = 'decimal(19,4)'
columns_list['settlement_provi'] = 'decimal(19,4)'
columns_list['lend_capital'] = 'decimal(19,4)'
columns_list['trading_assets'] = 'decimal(19,4)'
columns_list['bill_receivable'] = 'decimal(19,4)'
columns_list['account_receivable'] = 'decimal(19,4)'
columns_list['advance_payment'] = 'decimal(19,4)'
columns_list['insurance_receivables'] = 'decimal(19,4)'
columns_list['reinsurance_receivables'] = 'decimal(19,4)'
columns_list['reinsurance_contract_reserves_receivable'] = 'decimal(19,4)'
columns_list['interest_receivable'] = 'decimal(19,4)'
columns_list['dividend_receivable'] = 'decimal(19,4)'
columns_list['other_receivable'] = 'decimal(19,4)'
columns_list['bought_sellback_assets'] = 'decimal(19,4)'
columns_list['inventories'] = 'decimal(19,4)'
columns_list['non_current_asset_in_one_year'] = 'decimal(19,4)'
columns_list['other_current_assets'] = 'decimal(19,4)'
columns_list['total_current_assets'] = 'decimal(19,4)'
columns_list['loan_and_advance'] = 'decimal(19,4)'
columns_list['hold_for_sale_assets'] = 'decimal(19,4)'
columns_list['hold_to_maturity_investments'] = 'decimal(19,4)'
columns_list['longterm_receivable_account'] = 'decimal(19,4)'
columns_list['longterm_equity_invest'] = 'decimal(19,4)'
columns_list['investment_property'] = 'decimal(19,4)'
columns_list['fixed_assets'] = 'decimal(19,4)'
columns_list['constru_in_process'] = 'decimal(19,4)'
columns_list['construction_materials'] = 'decimal(19,4)'
columns_list['fixed_assets_liquidation'] = 'decimal(19,4)'
columns_list['biological_assets'] = 'decimal(19,4)'
columns_list['oil_gas_assets'] = 'decimal(19,4)'
columns_list['intangible_assets'] = 'decimal(19,4)'
columns_list['development_expenditure'] = 'decimal(19,4)'
columns_list['good_will'] = 'decimal(19,4)'
columns_list['long_deferred_expense'] = 'decimal(19,4)'
columns_list['deferred_tax_assets'] = 'decimal(19,4)'
columns_list['other_non_current_assets'] = 'decimal(19,4)'
columns_list['total_non_current_assets'] = 'decimal(19,4)'
columns_list['total_assets'] = 'decimal(19,4)'
columns_list['shortterm_loan'] = 'decimal(19,4)'
columns_list['borrowing_from_centralbank'] = 'decimal(19,4)'
columns_list['deposit_in_interbank'] = 'decimal(19,4)'
columns_list['borrowing_capital'] = 'decimal(19,4)'
columns_list['trading_liability'] = 'decimal(19,4)'
columns_list['notes_payable'] = 'decimal(19,4)'
columns_list['accounts_payable'] = 'decimal(19,4)'
columns_list['advance_peceipts'] = 'decimal(19,4)'
columns_list['sold_buyback_secu_proceeds'] = 'decimal(19,4)'
columns_list['commission_payable'] = 'decimal(19,4)'
columns_list['salaries_payable'] = 'decimal(19,4)'
columns_list['taxs_payable'] = 'decimal(19,4)'
columns_list['interest_payable'] = 'decimal(19,4)'
columns_list['dividend_payable'] = 'decimal(19,4)'
columns_list['other_payable'] = 'decimal(19,4)'
columns_list['reinsurance_payables'] = 'decimal(19,4)'
columns_list['insurance_contract_reserves'] = 'decimal(19,4)'
columns_list['proxy_secu_proceeds'] = 'decimal(19,4)'
columns_list['receivings_from_vicariously_sold_securities'] = 'decimal(19,4)'
columns_list['non_current_liability_in_one_year'] = 'decimal(19,4)'
columns_list['other_current_liability'] = 'decimal(19,4)'
columns_list['total_current_liability'] = 'decimal(19,4)'
columns_list['longterm_loan'] = 'decimal(19,4)'
columns_list['bonds_payable'] = 'decimal(19,4)'
columns_list['longterm_account_payable'] = 'decimal(19,4)'
columns_list['specific_account_payable'] = 'decimal(19,4)'
columns_list['estimate_liability'] = 'decimal(19,4)'
columns_list['deferred_tax_liability'] = 'decimal(19,4)'
columns_list['other_non_current_liability'] = 'decimal(19,4)'
columns_list['total_non_current_liability'] = 'decimal(19,4)'
columns_list['total_liability'] = 'decimal(19,4)'
columns_list['paidin_capital'] = 'decimal(19,4)'
columns_list['capital_reserve_fund'] = 'decimal(19,4)'
columns_list['treasury_stock'] = 'decimal(19,4)'
columns_list['specific_reserves'] = 'decimal(19,4)'
columns_list['surplus_reserve_fund'] = 'decimal(19,4)'
columns_list['ordinary_risk_reserve_fund'] = 'decimal(19,4)'
columns_list['retained_profit'] = 'decimal(19,4)'
columns_list['foreign_currency_report_conv_diff'] = 'decimal(19,4)'
columns_list['equities_parent_company_owners'] = 'decimal(19,4)'
columns_list['minority_interests'] = 'decimal(19,4)'
columns_list['total_owner_equities'] = 'decimal(19,4)'
columns_list['total_sheet_owner_equities'] = 'decimal(19,4)'
# 二次运算
columns_list['net_liability'] = 'decimal(19,4)'
columns_list['interest_bearing_liability'] = 'decimal(19,4)'
columns_list = collections.OrderedDict(sorted(columns_list.items(), key=lambda t: t[0]))
time_columns = 'P.ENDDATE'
del_columns = ['code', 'EXCHANGE', 'SType', 'ReportStyle', 'year']
sub_columns = [] # 换算单季
self._sync_fun.set_columns(columns_list, self.create_sql(), time_columns, del_columns,
sub_columns)
self._sync_fun.set_change_symbol(self.change_symbol)
def create_sql(self):
sql = """select S.Symbol AS code,S.Exchange AS EXCHANGE, S.SType, P.PublishDate AS pub_date,
P.ENDDATE AS report_date,P.REPORTTYPE AS ReportStyle, REPORTYEAR as year,
P.CURFDS as cash_equivalents,
P.SETTRESEDEPO as settlement_provi,
P.PLAC as lend_capital,
P.TRADFINASSET as trading_assets,
P.NOTESRECE as bill_receivable,
P.ACCORECE as account_receivable,
P.PREP as advance_payment,
P.PREMRECE as insurance_receivables,
P.REINRECE as reinsurance_receivables,
P.REINCONTRESE as reinsurance_contract_reserves_receivable,
P.INTERECE as interest_receivable,
P.DIVIDRECE as dividend_receivable,
P.OTHERRECE as other_receivable,
P.PURCRESAASSET as bought_sellback_assets,
P.INVE as inventories,
P.EXPINONCURRASSET as non_current_asset_in_one_year,
P.OTHERCURRASSE as other_current_assets,
P.TOTCURRASSET as total_current_assets,
P.LENDANDLOAN as loan_and_advance,
P.AVAISELLASSE as hold_for_sale_assets,
P.HOLDINVEDUE as hold_to_maturity_investments,
P.LONGRECE as longterm_receivable_account,
P.EQUIINVE as longterm_equity_invest,
P.INVEPROP as investment_property,
P.FIXEDASSEIMMO as fixed_assets,
P.CONSPROG as constru_in_process,
P.ENGIMATE as construction_materials,
P.FIXEDASSECLEA as fixed_assets_liquidation,
P.PRODASSE as biological_assets,
P.HYDRASSET as oil_gas_assets,
P.INTAASSET as intangible_assets,
P.DEVEEXPE as development_expenditure,
P.GOODWILL as good_will,
P.LOGPREPEXPE as long_deferred_expense,
P.DEFETAXASSET as deferred_tax_assets,
P.OTHERNONCASSE as other_non_current_assets,
P.TOTALNONCASSETS as total_non_current_assets,
P.TOTASSET as total_assets,
P.SHORTTERMBORR as shortterm_loan,
P.CENBANKBORR as borrowing_from_centralbank,
P.DEPOSIT as deposit_in_interbank,
P.FDSBORR as borrowing_capital,
P.TRADFINLIAB as trading_liability,
P.NOTESPAYA as notes_payable,
P.ACCOPAYA as accounts_payable,
P.ADVAPAYM as advance_peceipts,
P.SELLREPASSE as sold_buyback_secu_proceeds,
P.COPEPOUN as commission_payable,
P.COPEWORKERSAL as salaries_payable,
P.TAXESPAYA as taxs_payable,
P.INTEPAYA as interest_payable,
P.DIVIPAYA as dividend_payable,
P.OTHERFEEPAYA as other_payable,
P.COPEWITHREINRECE as reinsurance_payables,
P.INSUCONTRESE as insurance_contract_reserves,
P.ACTITRADSECU as proxy_secu_proceeds,
P.ACTIUNDESECU as receivings_from_vicariously_sold_securities,
P.DUENONCLIAB as non_current_liability_in_one_year,
P.OTHERCURRELIABI as other_current_liability,
P.TOTALCURRLIAB as total_current_liability,
P.LONGBORR as longterm_loan,
P.BDSPAYA as bonds_payable,
P.LONGPAYA as longterm_account_payable,
P.SPECPAYA as specific_account_payable,
P.EXPENONCLIAB as estimate_liability,
P.DEFEINCOTAXLIAB as deferred_tax_liability,
P.OTHERNONCLIABI as other_non_current_liability,
P.TOTALNONCLIAB as total_non_current_liability,
P.TOTLIAB as total_liability,
P.PAIDINCAPI as paidin_capital,
P.CAPISURP as capital_reserve_fund,
P.TREASTK as treasury_stock,
P.SPECRESE as specific_reserves,
P.RESE as surplus_reserve_fund,
P.GENERISKRESE as ordinary_risk_reserve_fund,
P.UNDIPROF as retained_profit,
P.CURTRANDIFF as foreign_currency_report_conv_diff,
P.PARESHARRIGH as equities_parent_company_owners,
P.MINYSHARRIGH as minority_interests,
P.RIGHAGGR as total_owner_equities,
P.TOTLIABSHAREQUI as total_sheet_owner_equities
from QADB.dbo.TQ_FIN_PROBALSHEETNEW AS P JOIN FCDB.dbo.SecurityCode as S ON
S.CompanyCode = P.COMPCODE
where P.REPORTTYPE={0} and S.SType='{1}' and S.Enabled=0 and S.Status=0 and """.format(1,
'EQA')
return sql
def secondary_sets(self, trades_date_fundamentals):
# 净资产
trades_date_fundamentals['net_liability'] = trades_date_fundamentals['shortterm_loan'] + \
trades_date_fundamentals['longterm_loan'] + \
trades_date_fundamentals['bonds_payable']
# 有息负债
trades_date_fundamentals['interest_bearing_liability'] = trades_date_fundamentals['net_liability'] - \
trades_date_fundamentals[
'non_current_liability_in_one_year']
# 资产合计
# trades_date_fundamentals['total_assets'] = trades_date_fundamentals['total_current_assets'] + \
# trades_date_fundamentals['total_current_assets']
return trades_date_fundamentals
def change_symbol(self, trades_date_df):
return np.where(trades_date_df['EXCHANGE'] == 'CNSESH',
trades_date_df['code'] + '.XSHG',
trades_date_df['code'] + '.XSHE')
def update_report(self, start_date, end_date, count):
self._sync_fun.update_report(start_date, end_date, count)
def do_update(self, start_date, end_date, count, order='DESC'):
self._sync_fun.do_update(start_date, end_date, count, order)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
parser.add_argument('--rebuild_report', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncBalance()
processor.create_columns()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
elif args.update:
processor = SyncBalance()
processor.create_columns()
processor.do_update(args.start_date, end_date, args.count)
elif args.rebuild_report:
processor = SyncBalance()
processor.create_columns()
processor.create_dest_report_tables()
processor.update_report(args.start_date, end_date, args.count)
elif args.report:
processor = SyncBalance()
processor.create_columns()
processor.update_report(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncBalance()
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name, 'trade_date')
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name + '_report', 'report_date')
print('running schedule report task, start date:', start_date, ';end date:', end_date)
processor.update_report(start_date, end_date, -1)
#!/usr/bin/env python
# coding=utf-8
import pdb
import collections
import numpy as np
from datetime import datetime
import sqlalchemy as sa
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import argparse
from sync_fundamentals import SyncFundamentals
class SyncCashFlow(object):
def __init__(self):
self._sync_fun = SyncFundamentals(sa.create_engine("mssql+pymssql://read:read@192.168.100.64:1433/QADB"),
None, 'cash_flow')
def create_dest_tables(self):
self._sync_fun.create_dest_tables('cash_flow')
def create_dest_report_tables(self):
self._sync_fun.create_dest_report_tables('cash_flow_report')
def create_columns(self):
columns_list = collections.OrderedDict()
columns_list['goods_sale_and_service_render_cash'] = 'decimal(19,4)'
columns_list['net_deposit_increase'] = 'decimal(19,4)'
columns_list['net_borrowing_from_central_bank'] = 'decimal(19,4)'
columns_list['net_borrowing_from_finance_co'] = 'decimal(19,4)'
columns_list['net_original_insurance_cash'] = 'decimal(19,4)'
columns_list['net_cash_received_from_reinsurance_business'] = 'decimal(19,4)'
columns_list['net_insurer_deposit_investment'] = 'decimal(19,4)'
columns_list['net_deal_trading_assets'] = 'decimal(19,4)'
columns_list['interest_and_commission_cashin'] = 'decimal(19,4)'
columns_list['net_increase_in_placements'] = 'decimal(19,4)'
columns_list['net_buyback'] = 'decimal(19,4)'
columns_list['tax_levy_refund'] = 'decimal(19,4)'
columns_list['other_cashin_related_operate'] = 'decimal(19,4)'
columns_list['subtotal_operate_cash_inflow'] = 'decimal(19,4)'
columns_list['goods_and_services_cash_paid'] = 'decimal(19,4)'
columns_list['net_loan_and_advance_increase'] = 'decimal(19,4)'
columns_list['net_deposit_in_cb_and_ib'] = 'decimal(19,4)'
columns_list['original_compensation_paid'] = 'decimal(19,4)'
columns_list['handling_charges_and_commission'] = 'decimal(19,4)'
columns_list['policy_dividend_cash_paid'] = 'decimal(19,4)'
columns_list['staff_behalf_paid'] = 'decimal(19,4)'
columns_list['tax_payments'] = 'decimal(19,4)'
columns_list['other_operate_cash_paid'] = 'decimal(19,4)'
columns_list['subtotal_operate_cash_outflow'] = 'decimal(19,4)'
columns_list['net_operate_cash_flow'] = 'decimal(19,4)'
columns_list['invest_withdrawal_cash'] = 'decimal(19,4)'
columns_list['invest_proceeds'] = 'decimal(19,4)'
columns_list['fix_intan_other_asset_dispo_cash'] = 'decimal(19,4)'
columns_list['net_cash_deal_subcompany'] = 'decimal(19,4)'
columns_list['other_cash_from_invest_act'] = 'decimal(19,4)'
columns_list['subtotal_invest_cash_inflow'] = 'decimal(19,4)'
columns_list['fix_intan_other_asset_acqui_cash'] = 'decimal(19,4)'
columns_list['invest_cash_paid'] = 'decimal(19,4)'
columns_list['impawned_loan_net_increase'] = 'decimal(19,4)'
columns_list['net_cash_from_sub_company'] = 'decimal(19,4)'
columns_list['other_cash_to_invest_act'] = 'decimal(19,4)'
columns_list['subtotal_invest_cash_outflow'] = 'decimal(19,4)'
columns_list['net_invest_cash_flow'] = 'decimal(19,4)'
columns_list['cash_from_invest'] = 'decimal(19,4)'
columns_list['cash_from_mino_s_invest_sub'] = 'decimal(19,4)'
columns_list['cash_from_borrowing'] = 'decimal(19,4)'
columns_list['cash_from_bonds_issue'] = 'decimal(19,4)'
columns_list['other_finance_act_cash'] = 'decimal(19,4)'
columns_list['subtotal_finance_cash_inflow'] = 'decimal(19,4)'
columns_list['borrowing_repayment'] = 'decimal(19,4)'
columns_list['dividend_interest_payment'] = 'decimal(19,4)'
columns_list['proceeds_from_sub_to_mino_s'] = 'decimal(19,4)'
columns_list['other_finance_act_payment'] = 'decimal(19,4)'
columns_list['subtotal_finance_cash_outflow'] = 'decimal(19,4)'
columns_list['net_finance_cash_flow'] = 'decimal(19,4)'
columns_list['exchange_rate_change_effect'] = 'decimal(19,4)'
columns_list['cash_equivalent_increase'] = 'decimal(19,4)'
columns_list['cash_equivalents_at_beginning'] = 'decimal(19,4)'
columns_list['cash_and_equivalents_at_end'] = 'decimal(19,4)'
columns_list = collections.OrderedDict(sorted(columns_list.items(), key=lambda t: t[0]))
time_columns = 'P.ENDDATE'
del_columns = ['code', 'EXCHANGE', 'SType', 'ReportStyle', 'year']
sub_columns = ['goods_sale_and_service_render_cash', 'net_deposit_increase', 'net_borrowing_from_central_bank',
'net_borrowing_from_finance_co', 'net_original_insurance_cash',
'net_cash_received_from_reinsurance_business', 'net_insurer_deposit_investment',
'net_deal_trading_assets', 'interest_and_commission_cashin', 'net_increase_in_placements',
'net_buyback', 'tax_levy_refund', 'other_cashin_related_operate', 'subtotal_operate_cash_inflow',
'goods_and_services_cash_paid', 'net_loan_and_advance_increase', 'net_deposit_in_cb_and_ib',
'original_compensation_paid', 'handling_charges_and_commission', 'policy_dividend_cash_paid',
'staff_behalf_paid', 'tax_payments', 'other_operate_cash_paid', 'subtotal_operate_cash_outflow',
'net_operate_cash_flow', 'invest_withdrawal_cash', 'invest_proceeds',
'fix_intan_other_asset_dispo_cash', 'net_cash_deal_subcompany', 'other_cash_from_invest_act',
'subtotal_invest_cash_inflow', 'fix_intan_other_asset_acqui_cash', 'invest_cash_paid',
'impawned_loan_net_increase', 'net_cash_from_sub_company', 'other_cash_to_invest_act',
'subtotal_invest_cash_outflow', 'net_invest_cash_flow', 'cash_from_invest',
'cash_from_mino_s_invest_sub', 'cash_from_borrowing', 'cash_from_bonds_issue',
'other_finance_act_cash', 'subtotal_finance_cash_inflow', 'borrowing_repayment',
'dividend_interest_payment', 'proceeds_from_sub_to_mino_s', 'other_finance_act_payment',
'subtotal_finance_cash_outflow', 'net_finance_cash_flow', 'exchange_rate_change_effect',
'cash_equivalent_increase', 'cash_equivalents_at_beginning',
'cash_and_equivalents_at_end'] # 换算单季
self._sync_fun.set_columns(columns_list, self.create_sql(), time_columns, del_columns,
sub_columns)
self._sync_fun.set_change_symbol(self.change_symbol)
def create_sql(self):
sql = """select S.Symbol AS code,S.Exchange AS EXCHANGE, S.SType, P.PublishDate AS pub_date,
P.ENDDATE AS report_date,P.REPORTTYPE AS ReportStyle, REPORTYEAR as year,
P.LABORGETCASH as goods_sale_and_service_render_cash,
P.DEPONETR as net_deposit_increase,
P.BANKLOANNETINCR as net_borrowing_from_central_bank,
P.FININSTNETR as net_borrowing_from_finance_co,
P.INSPREMCASH as net_original_insurance_cash,
P.INSNETC as net_cash_received_from_reinsurance_business,
P.SAVINETR as net_insurer_deposit_investment,
P.DISPTRADNETINCR as net_deal_trading_assets,
P.CHARINTECASH as interest_and_commission_cashin,
P.FDSBORRNETR as net_increase_in_placements,
P.REPNETINCR as net_buyback,
P.TAXREFD as tax_levy_refund,
P.RECEOTHERBIZCASH as other_cashin_related_operate,
P.BIZCASHINFL as subtotal_operate_cash_inflow,
P.LABOPAYC as goods_and_services_cash_paid,
P.LOANSNETR as net_loan_and_advance_increase,
P.TRADEPAYMNETR as net_deposit_in_cb_and_ib,
P.PAYCOMPGOLD as original_compensation_paid,
P.PAYINTECASH as handling_charges_and_commission,
P.PAYDIVICASH as policy_dividend_cash_paid,
P.PAYWORKCASH as staff_behalf_paid,
P.PAYTAX as tax_payments,
P.PAYACTICASH as other_operate_cash_paid,
P.BIZCASHOUTF as subtotal_operate_cash_outflow,
P.MANANETR as net_operate_cash_flow,
P.WITHINVGETCASH as invest_withdrawal_cash,
P.INVERETUGETCASH as invest_proceeds,
P.FIXEDASSETNETC as fix_intan_other_asset_dispo_cash,
P.SUBSNETC as net_cash_deal_subcompany,
P.RECEINVCASH as other_cash_from_invest_act,
P.INVCASHINFL as subtotal_invest_cash_inflow,
P.ACQUASSETCASH as fix_intan_other_asset_acqui_cash,
P.PAYINVECASH as invest_cash_paid,
P.LOANNETR as impawned_loan_net_increase,
P.SUBSPAYNETCASH as net_cash_from_sub_company,
P.PAYINVECASH as other_cash_to_invest_act,
P.INVCASHOUTF as subtotal_invest_cash_outflow,
P.INVNETCASHFLOW as net_invest_cash_flow,
P.INVRECECASH as cash_from_invest,
P.SUBSRECECASH as cash_from_mino_s_invest_sub,
P.RECEFROMLOAN as cash_from_borrowing,
P.ISSBDRECECASH as cash_from_bonds_issue,
P.RECEFINCASH as other_finance_act_cash,
P.FINCASHINFL as subtotal_finance_cash_inflow,
P.DEBTPAYCASH as borrowing_repayment,
P.DIVIPROFPAYCASH as dividend_interest_payment,
P.SUBSPAYDIVID as proceeds_from_sub_to_mino_s,
P.FINRELACASH as other_finance_act_payment,
P.FINCASHOUTF as subtotal_finance_cash_outflow,
P.FINNETCFLOW as net_finance_cash_flow,
P.CHGEXCHGCHGS as exchange_rate_change_effect,
P.CASHNETR as cash_equivalent_increase,
P.INICASHBALA as cash_equivalents_at_beginning,
FINALCASHBALA as cash_and_equivalents_at_end
from QADB.dbo.TQ_FIN_PROCFSTATEMENTNEW AS P JOIN FCDB.dbo.SecurityCode as S ON
S.CompanyCode = P.COMPCODE
where P.REPORTTYPE={0} AND S.SType='{1}' and S.Enabled=0 and S.Status=0 AND """.format(1,
'EQA')
return sql
def change_symbol(self, trades_date_df):
return np.where(trades_date_df['EXCHANGE'] == 'CNSESH',
trades_date_df['code'] + '.XSHG',
trades_date_df['code'] + '.XSHE')
def update_report(self, start_date, end_date, count):
self._sync_fun.update_report(start_date, end_date, count)
def do_update(self, start_date, end_date, count, order='DESC'):
self._sync_fun.do_update(start_date, end_date, count, order)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
parser.add_argument('--rebuild_report', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncCashFlow()
processor.create_columns()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
elif args.update:
processor = SyncCashFlow()
processor.create_columns()
processor.do_update(args.start_date, end_date, args.count)
elif args.rebuild_report:
processor = SyncCashFlow()
processor.create_columns()
processor.create_dest_report_tables()
processor.update_report(args.start_date, end_date, args.count)
elif args.report:
processor = SyncCashFlow()
processor.create_columns()
processor.update_report(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncCashFlow()
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name, 'trade_date')
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name + '_report', 'report_date')
print('running schedule report task, start date:', start_date, ';end date:', end_date)
processor.update_report(start_date, end_date, -1)
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
import collections
import numpy as np
import pdb
from datetime import datetime
import sqlalchemy as sa
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import argparse
from sync_fundamentals import SyncFundamentals
class SyncIncome(object):
def __init__(self):
self._sync_fun = SyncFundamentals(None, None, 'income')
def create_dest_tables(self):
self._sync_fun.create_dest_tables('income')
def create_dest_report_tables(self):
self._sync_fun.create_dest_report_tables('income_report')
def create_columns(self):
columns_list = collections.OrderedDict()
columns_list["total_operating_revenue"] = "decimal(19,4)"
columns_list["operating_revenue"] = "decimal(19,4)"
columns_list["interest_income"] = "decimal(19,4)"
columns_list["premiums_earned"] = "decimal(19,4)"
columns_list["commission_income"] = "decimal(19,4)"
columns_list["total_operating_cost"] = "decimal(19,4)"
columns_list["operating_cost"] = "decimal(19,4)"
columns_list["interest_expense"] = "decimal(19,4)"
columns_list["commission_expense"] = "decimal(19,4)"
columns_list["refunded_premiums"] = "decimal(19,4)"
columns_list["net_pay_insurance_claims"] = "decimal(19,4)"
columns_list["withdraw_insurance_contract_reserve"] = "decimal(19,4)"
columns_list["policy_dividend_payout"] = "decimal(19,4)"
columns_list["reinsurance_cost"] = "decimal(19,4)"
columns_list["operating_tax_surcharges"] = "decimal(19,4)"
columns_list["sale_expense"] = "decimal(19,4)"
columns_list["administration_expense"] = "decimal(19,4)"
columns_list["financial_expense"] = "decimal(19,4)"
columns_list["asset_impairment_loss"] = "decimal(19,4)"
columns_list["fair_value_variable_income"] = "decimal(19,4)"
columns_list["investment_income"] = "decimal(19,4)"
columns_list["invest_income_associates"] = "decimal(19,4)"
columns_list["exchange_income"] = "decimal(19,4)"
columns_list["operating_profit"] = "decimal(19,4)"
columns_list["non_operating_revenue"] = "decimal(19,4)"
columns_list["non_operating_expense"] = "decimal(19,4)"
columns_list["disposal_loss_non_current_liability"] = "decimal(19,4)"
columns_list["total_profit"] = "decimal(19,4)"
columns_list["income_tax_expense"] = "decimal(19,4)"
columns_list["net_profit"] = "decimal(19,4)"
columns_list["np_parent_company_owners"] = "decimal(19,4)"
columns_list["minority_profit"] = "decimal(19,4)"
columns_list["basic_eps"] = "decimal(19,4)"
columns_list["diluted_eps"] = "decimal(19,4)"
columns_list["other_composite_income"] = "decimal(19,4)"
columns_list["total_composite_income"] = "decimal(19,4)"
columns_list["ci_parent_company_owners"] = "decimal(19,4)"
columns_list["ci_minority_owners"] = "decimal(19,4)"
columns_list = collections.OrderedDict(sorted(columns_list.items(), key=lambda t: t[0]))
time_columns = 'P.ENDDATE'
del_columns = ['code', 'EXCHANGE', 'SType', 'ReportStyle', 'year']
sub_columns = ['total_operating_revenue', 'operating_revenue', 'interest_income', 'premiums_earned',
'commission_income', 'total_operating_cost', 'operating_cost', 'interest_expense',
'commission_expense', 'refunded_premiums', 'net_pay_insurance_claims',
'withdraw_insurance_contract_reserve', 'policy_dividend_payout', 'reinsurance_cost',
'operating_tax_surcharges', 'sale_expense', 'administration_expense', 'financial_expense',
'asset_impairment_loss', 'fair_value_variable_income', 'investment_income',
'invest_income_associates', 'exchange_income', 'operating_profit', 'non_operating_revenue',
'non_operating_expense', 'disposal_loss_non_current_liability', 'total_profit',
'income_tax_expense', 'net_profit', 'np_parent_company_owners', 'minority_profit', 'basic_eps',
'diluted_eps', 'other_composite_income', 'total_composite_income', 'ci_parent_company_owners',
'ci_minority_owners'] # 需要拿出来算单季
self._sync_fun.set_columns(columns_list, self.create_sql(), time_columns, del_columns, sub_columns)
self._sync_fun.set_change_symbol(self.change_symbol)
def create_sql(self):
sql = """select S.Symbol AS code,S.Exchange AS EXCHANGE, S.SType, P.PublishDate AS pub_date,
P.ENDDATE AS report_date,P.REPORTTYPE AS ReportStyle, REPORTYEAR as year,
P.BIZTOTINCO as total_operating_revenue,
P.BIZINCO as operating_revenue,
P.INTEINCO as interest_income,
P.EARNPREM as premiums_earned,
P.POUNINCO as commission_income,
P.BIZTOTCOST as total_operating_cost,
P.BIZCOST as operating_cost,
P.INTEEXPE as interest_expense,
P.POUNEXPE as commission_expense,
P.SURRGOLD as refunded_premiums,
P.COMPNETEXPE as net_pay_insurance_claims,
P.CONTRESS as withdraw_insurance_contract_reserve,
P.POLIDIVIEXPE as policy_dividend_payout,
P.REINEXPE as reinsurance_cost,
P.BIZTAX as operating_tax_surcharges,
P.SALESEXPE as sale_expense,
P.MANAEXPE as administration_expense,
P.FINEXPE as financial_expense,
P.ASSEIMPALOSS as asset_impairment_loss,
P.VALUECHGLOSS as fair_value_variable_income,
P.INVEINCO as investment_income,
P.ASSOINVEPROF as invest_income_associates,
P.EXCHGGAIN as exchange_income,
P.PERPROFIT as operating_profit,
P.NONOREVE as non_operating_revenue,
P.NONOEXPE as non_operating_expense,
P.NONCASSETSDISL as disposal_loss_non_current_liability,
P.TOTPROFIT as total_profit,
P.INCOTAXEXPE as income_tax_expense,
P.NETPROFIT as net_profit,
P.PARENETP as np_parent_company_owners,
P.MINYSHARRIGH as minority_profit,
P.BASICEPS as basic_eps,
P.DILUTEDEPS as diluted_eps,
P.OTHERCOMPINCO as other_composite_income,
P.COMPINCOAMT as total_composite_income,
P.PARECOMPINCO as ci_parent_company_owners,
P.MINYSHARINCO as ci_minority_owners
from QADB.dbo.TQ_FIN_PROINCSTATEMENTNEW AS P JOIN FCDB.dbo.SecurityCode as S on
S.CompanyCode = P.COMPCODE
where P.REPORTTYPE={0} AND S.SType='{1}' and S.Enabled=0 and S.Status=0 AND """.format(1,
'EQA')
return sql
def change_symbol(self, trades_date_df):
return np.where(trades_date_df['EXCHANGE'] == 'CNSESH',
trades_date_df['code'] + '.XSHG',
trades_date_df['code'] + '.XSHE')
def update_report(self, start_date, end_date, count):
self._sync_fun.update_report(start_date, end_date, count)
def do_update(self, start_date, end_date, count, order='DESC'):
self._sync_fun.do_update(start_date, end_date, count, order)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
parser.add_argument('--rebuild_report', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncIncome()
processor.create_columns()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
elif args.update:
processor = SyncIncome()
processor.create_columns()
processor.do_update(args.start_date, end_date, args.count)
elif args.rebuild_report:
processor = SyncIncome()
processor.create_columns()
processor.create_dest_report_tables()
processor.update_report(args.start_date, end_date, args.count)
elif args.report:
processor = SyncIncome()
processor.create_columns()
processor.update_report(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncIncome()
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name, 'trade_date')
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name + '_report', 'report_date')
print('running schedule report task, start date:', start_date, ';end date:', end_date)
processor.update_report(start_date, end_date, -1)
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
import collections
import numpy as np
import pdb
from datetime import datetime
import sqlalchemy as sa
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import argparse
from sync_fundamentals import SyncFundamentals
class SyncIndicator(object):
def __init__(self):
self._sync_fun = SyncFundamentals(sa.create_engine("mssql+pymssql://read:read@192.168.100.64:1433/QADB"),
None, 'indicator')
def create_dest_tables(self):
self._sync_fun.create_dest_tables('indicator')
def create_dest_report_tables(self):
self._sync_fun.create_dest_report_tables('indicator_report')
def create_columns(self):
columns_list = collections.OrderedDict()
columns_list["eps"] = "decimal(19,4)"
columns_list["adjusted_profit"] = "decimal(19,4)"
columns_list["operating_profit"] = "decimal(19,4)"
columns_list["value_change_profit"] = "decimal(19,4)"
columns_list["roe"] = "decimal(19,4)"
columns_list["inc_return"] = "decimal(19,4)"
columns_list["roa"] = "decimal(19,4)"
columns_list["net_profit_margin"] = "decimal(19,4)"
columns_list["gross_profit_margin"] = "decimal(19,4)"
columns_list["expense_to_total_revenue"] = "decimal(19,4)"
columns_list["operation_profit_to_total_revenue"] = "decimal(19,4)"
columns_list["net_profit_to_total_revenue"] = "decimal(19,4)"
columns_list["operating_expense_to_total_revenue"] = "decimal(19,4)"
columns_list["ga_expense_to_total_revenue"] = "decimal(19,4)"
columns_list["financing_expense_to_total_revenue"] = "decimal(19,4)"
columns_list["operating_profit_to_profit"] = "decimal(19,4)"
columns_list["invesment_profit_to_profit"] = "decimal(19,4)"
columns_list["adjusted_profit_to_profit"] = "decimal(19,4)"
columns_list["goods_sale_and_service_to_revenue"] = "decimal(19,4)"
columns_list["ocf_to_revenue"] = "decimal(19,4)"
columns_list["ocf_to_operating_profit"] = "decimal(19,4)"
columns_list["inc_total_revenue_year_on_year"] = "decimal(19,4)"
columns_list["inc_total_revenue_annual"] = "decimal(19,4)"
columns_list["inc_revenue_year_on_year"] = "decimal(19,4)"
columns_list["inc_revenue_annual"] = "decimal(19,4)"
columns_list["inc_operation_profit_year_on_year"] = "decimal(19,4)"
columns_list["inc_operation_profit_annual"] = "decimal(19,4)"
columns_list["inc_net_profit_year_on_year"] = "decimal(19,4)"
columns_list["inc_net_profit_annual"] = "decimal(19,4)"
columns_list["inc_net_profit_to_shareholders_year_on_year"] = "decimal(19,4)"
columns_list["inc_net_profit_to_shareholders_annual"] = "decimal(19,4)"
columns_list = collections.OrderedDict(sorted(columns_list.items(), key=lambda t: t[0]))
time_columns = 'a.ENDDATE'
del_columns = ['code', 'EXCHANGE', 'SType', 'year']
sub_columns = ['eps',
'adjusted_profit',
'operating_profit',
'value_change_profit'
] # 需要拿出来算单季
self._sync_fun.set_columns(columns_list, self.create_sql(), time_columns, del_columns, sub_columns)
self._sync_fun.set_change_symbol(self.change_symbol)
def create_sql(self):
sql = """select S.Symbol AS code,S.Exchange AS EXCHANGE, S.SType,a.REPORTYEAR as year,
a.FIRSTPUBLISHDATE as pub_date,
a.ENDDATE AS report_date,
a.EPSBASIC as eps,
a.NPCUT as adjusted_profit,
b.NOPI as operating_profit,
b.NVALCHGIT as value_change_profit,
a.ROEDILUTED as roe,
b.ROEDILUTEDCUT as inc_return,
b.ROAAANNUAL as roa,
b.SNPMARGINCONMS as net_profit_margin,
b.SGPMARGIN as gross_profit_margin,
c.OCOI as expense_to_total_revenue,
b.OPPRORT as operation_profit_to_total_revenue,
c.PROFITRATIO as net_profit_to_total_revenue,
b.OPEXPRT as operating_expense_to_total_revenue,
b.MGTEXPRT as ga_expense_to_total_revenue,
b.FINLEXPRT as financing_expense_to_total_revenue,
b.OPANITOTP as operating_profit_to_profit,
b.NVALCHGITOTP as invesment_profit_to_profit,
b.ROEDILUTEDCUT as adjusted_profit_to_profit,
b.SCASHREVTOOPIRT as goods_sale_and_service_to_revenue,
b.OPANCFTOOPNI as ocf_to_operating_profit,
b.TAGRT as inc_total_revenue_year_on_year,
c.OPERINYOYB as inc_revenue_year_on_year,
c.OPERPROYOYB as inc_operation_profit_year_on_year,
c.NETPROFITYOYB as inc_net_profit_year_on_year,
c.NETINPNRPLYOYB as inc_net_profit_to_shareholders_year_on_year
from TQ_FIN_PROFINMAININDEX a
left join TQ_FIN_PROINDICDATA b
on a.COMPCODE=b.COMPCODE and a.REPORTYEAR=b.REPORTYEAR and b.REPORTTYPE=3 and a.REPORTDATETYPE=b.REPORTDATETYPE
left join TQ_FIN_PROINDICDATASUB c
on a.COMPCODE=c.COMPCODE and a.REPORTYEAR=c.REPORTYEAR and c.REPORTTYPE=3 and a.REPORTDATETYPE=c.REPORTDATETYPE
JOIN FCDB.dbo.SecurityCode as S
on S.CompanyCode = a.COMPCODE
where a.REPORTTYPE={0} AND S.SType='{1}' and S.Enabled=0 and S.Status=0 AND """.format(1,
'EQA')
return sql
def change_symbol(self, trades_date_df):
return np.where(trades_date_df['EXCHANGE'] == 'CNSESH',
trades_date_df['code'] + '.XSHG',
trades_date_df['code'] + '.XSHE')
def update_report(self, start_date, end_date, count):
self._sync_fun.update_report(start_date, end_date, count)
def do_update(self, start_date, end_date, count, order='DESC'):
self._sync_fun.do_update(start_date, end_date, count, order)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--report', type=bool, default=False)
parser.add_argument('--rebuild_report', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncIndicator()
processor.create_columns()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
elif args.update:
processor = SyncIndicator()
processor.create_columns()
processor.do_update(args.start_date, end_date, args.count)
elif args.rebuild_report:
processor = SyncIndicator()
processor.create_columns()
processor.create_dest_report_tables()
processor.update_report(args.start_date, end_date, args.count)
elif args.report:
processor = SyncIndicator()
processor.create_columns()
processor.update_report(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncIndicator()
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name, 'trade_date')
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
processor.create_columns()
start_date = processor._sync_fun.get_start_date(processor._sync_fun._table_name + '_report', 'report_date')
print('running schedule report task, start date:', start_date, ';end date:', end_date)
processor.update_report(start_date, end_date, -1)
#!/usr/bin/env python
# coding=utf-8
import os
import sys
from datetime import datetime
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
from sqlalchemy.orm import sessionmaker
import multiprocessing
sys.path.append('../..')
from utillities.sync_util import SyncUtil
from utillities.calc_tools import CalcTools
import config
from utillities.time_common import TimeCommon
class SyncFundamentals(object):
def __init__(self, source=None, destination=None, table_name=''):
source_db = '''mssql+pymssql://{0}:{1}@{2}:{3}/{4}'''.format(config.source_db_user, config.source_db_pwd,
config.source_db_host, config.source_db_port,
config.source_db_database)
destination_db = '''mysql+mysqlconnector://{0}:{1}@{2}:{3}/{4}'''.format(config.destination_db_user,
config.destination_db_pwd,
config.destination_db_host,
config.destination_db_port,
config.destination_db_database)
self._sync_util = SyncUtil()
if source == None:
self._source = sa.create_engine(source_db)
else:
self._source = source
# self._destination = destination
self._destination = sa.create_engine(destination_db)
self._dest_session = sessionmaker(bind=self._destination, autocommit=False, autoflush=True)
self._dir = config.RECORD_BASE_DIR
self._secondary_func = None
self._table_name = table_name
def get_start_date(self, table, type='trade_date'):
sql = """select max({0}) as trade_date from `{1}`;""".format(type, table)
trades_sets = pd.read_sql(sql, self._destination)
td = 20070101
if not trades_sets.empty:
td = trades_sets['trade_date'][0]
td = str(td).replace('-', '')
return td
def set_columns(self, columns, get_sql, year_columns, del_columns,
sub_columns): # key和 value 类型
self._columns = columns
self._get_sql = get_sql
self._time_columns = year_columns
self._del_columns = del_columns
self._sub_columns = sub_columns
# 二次加工处理
def secondary_sets(self, secondary_func):
self._secondary_func = secondary_func
def set_change_symbol(self, change_symbol):
self._change_symbol = change_symbol
def create_index(self):
session = self._dest_session()
indexs = [
'''CREATE INDEX {0}_trade_date_symbol_index ON `{0}` (trade_date, symbol);'''.format(self._table_name)
]
for sql in indexs:
session.execute(sql)
session.commit()
session.close()
def create_dest_tables(self, table_name):
drop_sql = """drop table if exists `{0}`;""".format(table_name)
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(24) NOT NULL,
`pub_date` date NOT NULL,
`trade_date` date NOT NULL,
`report_date` date NOT NULL,""".format(table_name)
for key, value in self._columns.items():
create_sql += """`{0}` {1} DEFAULT NULL,""".format(key, value)
create_sql += "PRIMARY KEY(`id`,`symbol`,`trade_date`,`report_date`)"
create_sql += """) ENGINE=InnoDB DEFAULT CHARSET=utf8;"""
session = self._dest_session()
session.execute(drop_sql)
session.execute(create_sql)
session.execute(
'''alter table `{0}` add `creat_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP;'''.format(self._table_name))
session.execute(
'''alter table `{0}` add `update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP;'''.format(
self._table_name))
session.commit()
session.close()
self.create_index()
def create_dest_report_tables(self, table_name):
drop_sql = """drop table if exists `{0}`;""".format(table_name)
create_sql = """create table `{0}`(
`symbol` varchar(24) NOT NULL,
`pub_date` date NOT NULL,
`report_date` date NOT NULL,""".format(table_name)
for key, value in self._columns.items():
create_sql += """`{0}` {1} DEFAULT NULL,""".format(key, value)
create_sql += "PRIMARY KEY(`symbol`,`report_date`,`pub_date`)"
create_sql += """) ENGINE=InnoDB DEFAULT CHARSET=utf8;"""
session = self._dest_session()
session.execute(drop_sql)
session.execute(create_sql)
session.commit()
session.close()
def year_update(self, df):
df.loc[df.index, self._sub_columns] = df[self._sub_columns] - df[self._sub_columns].shift(-1).fillna(0)
return df
def change_single(self, year):
fundamentals_sets_year = self.fundamentals_sets[self.fundamentals_sets['year'] == year]
stock_list = list(set(fundamentals_sets_year['code']))
new_fundamentals_sets = pd.DataFrame()
for stock in stock_list:
print(stock)
new_fundamentals_sets = new_fundamentals_sets.append(self.year_update(
fundamentals_sets_year[
fundamentals_sets_year['code'] == stock]))
return new_fundamentals_sets
def foo(self, x):
sum = 0
for i in range(x):
sum = i
return sum
def fetch_batch_fundamentals(self, report_start_date):
self._get_sql += """ {0} >= '{1}' ORDER BY {0} DESC;""".format(self._time_columns,
report_start_date)
fundamentals_sets = pd.read_sql(self._get_sql.format(report_start_date), self._source)
# fundamentals_sets = fundamentals_sets.apply(self._unit.plus_year, axis=1)
fundamentals_sets = fundamentals_sets.fillna(0)
# return fundamentals_sets
# 读取年
# 读取股票代码
new_fundamentals_sets = pd.DataFrame()
year_list = list(set(fundamentals_sets['year']))
year_list.sort(reverse=True)
stock_list = list(set(fundamentals_sets['code']))
params = []
cpus = multiprocessing.cpu_count()
if len(year_list) < 4:
for i in range(cpus):
stock_list_cpu = stock_list[i::cpus]
params.append({
'fundamentals_sets_symbol': fundamentals_sets[fundamentals_sets['code'].isin(stock_list_cpu)],
'sub_columns': self._sub_columns,
'cpu': i
})
with multiprocessing.Pool(processes=cpus) as p:
res = p.map(CalcTools.change_single_by_symbol, params)
else:
for year in year_list:
print(year)
params.append({
'fundamentals_sets_year': fundamentals_sets[fundamentals_sets['year'] == year],
'sub_columns': self._sub_columns,
'year': year
})
# fundamentals_sets_year = fundamentals_sets[fundamentals_sets['year'] == year]
# for stock in stock_list:
# new_fundamentals_sets = new_fundamentals_sets.append(self.year_update(
# fundamentals_sets_year[
# fundamentals_sets_year['code'] == stock]))
# with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as p:
# res = p.map(self.foo, years)
with multiprocessing.Pool(processes=cpus) as p:
res = p.map(CalcTools.change_single, params)
new_fundamentals_sets = pd.concat(res, sort=False, axis=0)
return new_fundamentals_sets
def update_fundamentals(self, trades_sets, fundamentals_sets, report_date_list):
session = self._dest_session()
trades_list = list(trades_sets['TRADEDATE'])
trades_list.sort(reverse=True)
for trade_date in trades_list:
print(trade_date)
date_range = self._sync_util.every_report_range(trade_date, report_date_list)
trades_date_fundamentals = fundamentals_sets[(fundamentals_sets['report_date'] >= str(date_range[1])) & (
fundamentals_sets['pub_date'] <= str(date_range[0]))]
trades_date_fundamentals.sort_values(by='report_date', ascending=False, inplace=True)
trades_date_fundamentals.drop_duplicates(subset=['code'], keep='first', inplace=True)
trades_date_fundamentals['trade_date'] = trade_date
trades_date_fundamentals = trades_date_fundamentals.dropna(how='all')
trades_date_fundamentals['symbol'] = self._change_symbol(trades_date_fundamentals)
trades_date_fundamentals['id'] = trades_date_fundamentals['symbol'] + trades_date_fundamentals['trade_date']
trades_date_fundamentals.drop(self._del_columns, axis=1, inplace=True)
# 二次加工
if self._secondary_func is not None:
trades_date_fundamentals = self._secondary_func(trades_date_fundamentals)
# 本地保存
file_path = self._dir + '/' + self._table_name
if not os.path.exists(file_path):
os.makedirs(file_path)
file_name = os.path.join(file_path, str(trade_date)) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
trades_date_fundamentals.reset_index(drop=True, inplace=True)
trades_date_fundamentals.to_csv(file_name, encoding='UTF-8')
try:
session.execute('''delete from `{0}` where trade_date={1}'''.format(self._table_name, trade_date))
session.commit()
trades_date_fundamentals.to_sql(name=self._table_name, con=self._destination, if_exists='append',
index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(trades_date_fundamentals)
def update_report(self, start_date, end_date, count):
session = self._dest_session()
trade_sets = self._sync_util.get_trades_ago('001002', start_date, end_date, count)
min_year = int(int(trade_sets['TRADEDATE'].min()) / 10000)
max_year = int(int(trade_sets['TRADEDATE'].max()) / 10000)
report_date_list = self._sync_util.create_report_date(min_year, max_year)
min_report_year = report_date_list[0]
new_fundamentals = self.fetch_batch_fundamentals(min_report_year)
for report_date in report_date_list:
report_fundamentals = new_fundamentals[
# new_fundamentals['report_date'] == datetime.strptime(str(report_date), '%Y%m%d')]
new_fundamentals['report_date'] == str(report_date)]
if report_fundamentals.empty:
continue
report_fundamentals['symbol'] = self._change_symbol(report_fundamentals)
report_fundamentals.drop(self._del_columns, axis=1, inplace=True)
# 二次加工
if self._secondary_func is not None:
report_fundamentals = self._secondary_func(report_fundamentals)
file_path = self._dir + '/report/' + self._table_name
if not os.path.exists(file_path):
os.makedirs(file_path)
file_name = os.path.join(file_path, str(report_date)) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
report_fundamentals.to_csv(self._dir + '/report/' + self._table_name + '/' + str(report_date) + '.csv',
encoding='UTF-8')
try:
session.execute('''delete from `{0}` where report_date={1}'''.format(self._table_name, report_date))
session.commit()
report_fundamentals.to_sql(name=self._table_name + '_report', con=self._destination, if_exists='append',
index=False)
except Exception as e:
print(e.orig.msg)
self.insert_or_update(report_fundamentals)
def do_update(self, start_date, end_date, count, order):
trade_sets = self._sync_util.get_trades_ago('001002', start_date, end_date, count, order)
min_year = int(int(trade_sets['TRADEDATE'].min()) / 10000)
max_year = int(int(trade_sets['TRADEDATE'].max()) / 10000)
report_date_list = self._sync_util.create_report_date(min_year, max_year)
# 将初始放入进去获取完整一年报告,因此用报告最早期读取数据
min_report_year = report_date_list[0]
new_fundamentals = self.fetch_batch_fundamentals(min_report_year)
self.update_fundamentals(trade_sets, new_fundamentals, report_date_list)
def insert_or_update(self, datas):
session = self._dest_session()
for i in range(datas.shape[0]):
data = datas.iloc[i]
values = ''
update = ''
title = ''
for j in range(len(data)):
index = data.index[j]
value = str(data[j]).replace("'", "\\'")
title += """`{0}`,""".format(index)
values += """'{0}',""".format(value)
update += """`{0}`='{1}',""".format(index, value)
sql = '''insert into {0} ({1}) values({2}) ON DUPLICATE KEY UPDATE {3}'''.format(self._table_name,
title[0:-1],
values[0:-1],
update[0:-1]
)
sql = sql.replace("'nan'", 'Null').replace("'None'", 'Null')
session.execute(sql)
session.commit()
session.close()
def change_symbol(trades_date_df):
return np.where(trades_date_df['EXCHANGE'] == 'CNSESH',
trades_date_df['code'] + '.XSHG',
trades_date_df['code'] + '.XSHE')
if __name__ == '__main__':
sync_fun = SyncFundamentals(sa.create_engine("mssql+pymssql://read:read@10.17.205.155:1433/FCDB"),
sa.create_engine("mysql+mysqlconnector://root:t2R7P7@10.15.5.86:3306/factors"))
columns_list = collections.OrderedDict()
columns_list["total_operating_revenue"] = "decimal(19,4)" # 9
columns_list["total_operating_cost"] = "decimal(19,4)" # 8
columns_list["operating_revenue"] = "decimal(19,4)" # 5
columns_list["operating_cost"] = "decimal(19,4)" # 4
columns_list["interest_income"] = "decimal(19,4)" # 1
columns_list["permiums_earned"] = "decimal(19,4)" # 6
columns_list["commission_income"] = "decimal(19,4)" # 0
columns_list["refunded_premiums"] = "decimal(19,4)" # 7
columns_list["net_pay_insurance_claims"] = "decimal(19,4)" # 2
columns_list["withdraw_insurance_contract_reserve"] = "decimal(19,4)" # 10
columns_list["net_profit"] = "decimal(19,4)" # 3
columns_list = collections.OrderedDict(sorted(columns_list.items(), key=lambda t: t[0]))
sql = """select S.Symbol AS code,S.Exchange AS EXCHANGE, S.SType, C.PublishDate AS pub_date,
C.ReportDate AS report_date,C.ReportStyle,
C.CINST61 AS {0},
C.CINST65 AS {1},
C.CINST1 AS {2},
C.CINST3 AS {3},
C.CINST62 AS {4},
C.CINST64 AS {5},
C.CINST68 AS {6},
C.CINST63 AS {7},
C.CINST69 AS {8},
C.CINST70 AS {9},
C.CINST24 AS {10}
from FCDB.dbo.CINST_New AS C JOIN FCDB.dbo.SecurityCode as S ON S.CompanyCode = C.CompanyCode
where C.ReportStyle=11 AND S.SType='{11}' AND S.Symbol in ('600519') AND """.format(
columns_list.keys()[9],
columns_list.keys()[8],
columns_list.keys()[5],
columns_list.keys()[4],
columns_list.keys()[1],
columns_list.keys()[6],
columns_list.keys()[0],
columns_list.keys()[7],
columns_list.keys()[2],
columns_list.keys()[10],
columns_list.keys()[3],
'EQA')
time_columns = 'C.ReportDate'
del_columns = ['code', 'EXCHANGE', 'SType', 'ReportStyle', 'year', 'report_type']
sync_fun.set_columns(columns_list, sql, time_columns, del_columns)
sync_fun.set_change_symbol(change_symbol)
sync_fun.create_dest_tables('income')
sync_fun.do_update(20, '20190107')
#!/usr/bin/env python
# coding=utf-8
import pdb
import sys
import os
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
import argparse
from datetime import datetime, date, time
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from base_sync import BaseSync
sys.path.append('..')
from utillities.sync_util import SyncUtil
import config
class SyncSkDailyPrice(BaseSync):
def __init__(self):
super(SyncSkDailyPrice, self).__init__('sk_daily_price')
self.sync_util = SyncUtil()
self.dir = config.RECORD_BASE_DIR + self.dest_table + '/'
def create_dest_tables(self):
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(32) NOT NULL,
`trade_date` date NOT NULL,
`name` varchar(50) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`pre_close` decimal(15,6) DEFAULT NULL,
`open` decimal(15,6) DEFAULT NULL,
`close` decimal(15,6) DEFAULT NULL,
`high` decimal(15,6) DEFAULT NULL,
`low` decimal(15,6) DEFAULT NULL,
`volume` decimal(20,2) DEFAULT NULL,
`money` decimal(18,3) DEFAULT NULL,
`deals` decimal(10,0) DEFAULT NULL,
`change` decimal(9,4) DEFAULT NULL,
`change_pct` decimal(8,4) DEFAULT NULL,
`tot_mkt_cap` decimal(18,4) DEFAULT NULL,
`turn_rate` decimal(9,4) DEFAULT NULL,
`factor` decimal(9,4) DEFAULT NULL,
`ltd_factor` decimal(9,4) DEFAULT NULL,
PRIMARY KEY(`id`,`trade_date`,`symbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
""".format(self.dest_table)
self.create_table(create_sql)
def get_index_sets(self, trade_date):
# sql = """SELECT TRADEDATE as trade_date,
# i.exchange as Exchange,
# s.symbol as code,
# i.SENAME as name,
# LCLOSE as pre_close,
# TOPEN as 'open',
# TCLOSE as 'close',
# THIGH as high,
# TLOW as low,
# VOL as volume,
# AMOUNT as money,
# DEALS as deals,
# CHANGE as change,
# PCHG as change_pct,
# TOTMKTCAP as tot_mkt_cap,
# TURNRATE as turn_rate,
# n.R as factor,
# n.LTDR as ltd_factor
# from QADB.dbo.TQ_QT_SKDAILYPRICE i
# left join TQ_OA_STCODE s on i.SECODE = s.secode
# left join FCDB.dbo.DISPARA_NEW n on n.symbol = s.symbol and n.TDATE = '{0}' and n.etl_isvalid=1
# where (i.exchange = '001002' or i.exchange = '001003') and i.ISVALID = 1 and s.ISVALID = 1 and TRADEDATE = '{0}';""".format(
# trade_date)
sql = """select a.*,
b.R as factor,
b.LTDR as ltd_factor
from (
SELECT TRADEDATE as trade_date,
i.exchange as Exchange,
s.symbol as code,
i.SENAME as name,
LCLOSE as pre_close,
TOPEN as 'open',
TCLOSE as 'close',
THIGH as high,
TLOW as low,
VOL as volume,
AMOUNT as money,
DEALS as deals,
CHANGE as change,
PCHG as change_pct,
TOTMKTCAP as tot_mkt_cap,
TURNRATE as turn_rate
from QADB.dbo.TQ_QT_SKDAILYPRICE i
left join TQ_OA_STCODE s on i.SECODE = s.secode
where (i.exchange = '001002' or i.exchange = '001003') and i.ISVALID = 1 and s.ISVALID = 1 and TRADEDATE ='{0}'
) a
left join (select * from FCDB.dbo.DISPARA_NEW n where n.TDATE= '{0}' and n.etl_isvalid=1) b
on b.symbol = a.code;""".format(trade_date)
return pd.read_sql(sql, self.source)
def do_update(self, start_date, end_date, count, order='DESC'):
# 读取交易日
trade_sets = self.sync_util.get_trades_ago('001002', start_date, end_date, count, order)
trade_list = list(trade_sets['TRADEDATE'])
for trade_date in trade_list:
print(trade_date)
index_sets = self.get_index_sets(trade_date)
if index_sets.empty:
continue
try:
index_sets['symbol'] = np.where(index_sets['Exchange'] == '001002',
index_sets['code'] + '.XSHG',
index_sets['code'] + '.XSHE')
index_sets['id'] = index_sets['symbol'] + str(trade_date)
index_sets.drop(['Exchange', 'code'], axis=1, inplace=True)
# 本地保存
if not os.path.exists(self.dir):
os.makedirs(self.dir)
file_name = self.dir + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
index_sets.to_csv(file_name, encoding='UTF-8')
# 数据库保存
try:
self.delete_trade_data(trade_date)
index_sets.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as sql_err:
print(sql_err.orig.msg)
self.insert_or_update(index_sets)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncSkDailyPrice()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
if args.update:
processor = SyncSkDailyPrice()
processor.do_update(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncSkDailyPrice()
start_date = processor.get_start_date()
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
#!/usr/bin/env python
# coding=utf-8
import pdb
import os
import sys
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
import argparse
from datetime import datetime, date
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from base_sync import BaseSync
sys.path.append('..')
from utillities.sync_util import SyncUtil
import config
class SyncIndustry(BaseSync):
def __init__(self):
self.sync_util = SyncUtil()
super(SyncIndustry, self).__init__('sw_industry')
self.source = sa.create_engine("mssql+pymssql://HF_read:read@192.168.100.165:1433/FCDB")
self.dir = config.RECORD_BASE_DIR + self.dest_table + '/'
def create_dest_tables(self):
create_sql = """create table `{0}`(
`id` varchar(128) NOT NULL,
`isymbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`iname` varchar(128) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`symbol` varchar(32) NOT NULL,
`sname` varchar(128) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci' NOT NULL,
`weighing` decimal(8,2) DEFAULT NULL,
PRIMARY KEY(`id`,`trade_date`,`isymbol`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
""".format(self.dest_table)
self.create_table(create_sql)
def get_sw_industry(self):
sql_pe = """(SELECT Symbol from FCDB.dbo.iprofile where
Iprofile7 ='申万一级行业指数' or Iprofile7 ='申万二级行业指数'
or Iprofile7 ='申万三级行业指数')"""
return pd.read_sql(sql_pe, self.source)
def get_index_sets(self, trade_date, industry_sets):
sql = """select Isymbol as isymbol,Tdate as trade_date,
Iname as iname, Symbol as code,Exchange,
Sname as sname, Weighing as weighing from FCDB.dbo.issweight where
Isymbol in {1} and Tdate = '{0}';""".format(trade_date, industry_sets)
sql = sql.replace('[', '(')
sql = sql.replace(']', ')')
return pd.read_sql(sql, self.source)
def do_update(self, start_date, end_date, count, order='DESC'):
# 读取交易日
trade_sets = self.sync_util.get_trades_ago('001002', start_date, end_date, count, order)
sw_industry = self.get_sw_industry()
trade_list = list(trade_sets['TRADEDATE'])
for trade_date in trade_list:
print(trade_date)
index_sets = self.get_index_sets(trade_date, list(sw_industry['Symbol'].astype('str')))
if index_sets.empty:
continue
try:
index_sets['symbol'] = np.where(index_sets['Exchange'] == 'CNSESH',
index_sets['code'] + '.XSHG',
index_sets['code'] + '.XSHE')
index_sets['id'] = index_sets['symbol'] + str(trade_date) + index_sets['isymbol']
index_sets.drop(['Exchange', 'code'], axis=1, inplace=True)
# 本地保存
if not os.path.exists(self.dir):
os.makedirs(self.dir)
file_name = self.dir + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
index_sets.to_csv(file_name, encoding='UTF-8')
# 数据库保存
try:
self.delete_trade_data(trade_date)
index_sets.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as sql_err:
print(sql_err.orig.msg)
self.insert_or_update(index_sets)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncIndustry()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
if args.update:
processor = SyncIndustry()
processor.do_update(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncIndustry()
start_date = processor.get_start_date()
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
#!/usr/bin/env python
# coding=utf-8
import datetime
import os
import sys
import pdb
import sqlalchemy as sa
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
sys.path.append('..')
import config
class TmImportUtils(object):
def __init__(self, source, destination, source_table, dest_table):
# 源数据库
self.source = source
# 目标数据库
self.destination = destination
# 目标数据库Session
self.dest_session = sessionmaker(bind=self.destination, autocommit=False, autoflush=True)
self.source_table = source_table
self.dest_table = dest_table
self._dir = config.RECORD_BASE_DIR
self._secondary_func = None
def create_dest_tables(self, create_sql):
drop_sql = """drop table if exists `{0}`;""".format(self.dest_table)
session = self.dest_session()
session.execute(drop_sql)
session.execute(create_sql)
session.commit()
session.close()
def get_max_tm_source(self):
sql = 'select max(cast(tmstamp as bigint))as tm from ' + self.source_table
trades_sets = pd.read_sql(sql, self.source)
tm = 0
if not trades_sets.empty:
tm = trades_sets['tm'][0]
return tm
def get_min_tm_source(self):
sql = 'select min(cast(tmstamp as bigint))as tm from ' + self.source_table
trades_sets = pd.read_sql(sql, self.source)
tm = 0
if not trades_sets.empty:
tm = trades_sets['tm'][0]
return tm
def get_max_tm_log(self):
sql = """select max_tag from update_log where task_name='{0}'""".format(self.dest_table)
trades_sets = pd.read_sql(sql, self.destination)
tm = 0
if not trades_sets.empty:
tm = trades_sets['max_tag'][0]
return tm
def update_update_log(self, tm):
session = self.dest_session()
sql = """insert into update_log (task_name,max_tag) values ('{0}',{1})
ON DUPLICATE KEY UPDATE task_name='{0}',max_tag={1}""".format(self.dest_table, tm)
sql = sql.replace('\n', '')
session.execute(sql)
session.commit()
session.close()
print('更新', self.dest_table, '的max_tag为', tm)
def create_report_date(self, min_year, max_year):
report_date_list = []
start_date = min_year - 1
while start_date < max_year:
report_date_list.append(start_date * 10000 + 331)
report_date_list.append(start_date * 10000 + 630)
report_date_list.append(start_date * 10000 + 930)
report_date_list.append(start_date * 10000 + 1231)
start_date += 1
report_date_list.sort()
return report_date_list
# 遗留问题,专门生成以日期为文件的报告期数据
def update_report(self, count, end_date, sql):
sql += ' and a.EndDate = {0}'
max_year = int(end_date / 10000)
min_year = max_year - count
report_date_list = self.create_report_date(min_year, max_year)
for report_date in report_date_list:
report_fundamentals = pd.read_sql(sql.format(report_date), self.source)
if report_fundamentals.empty:
continue
report_fundamentals['symbol'] = np.where(report_fundamentals['Exchange'] == 'CNSESH',
report_fundamentals['code'] + '.XSHG',
report_fundamentals['code'] + '.XSHE')
report_fundamentals.drop(['Exchange', 'code'], axis=1, inplace=True)
# 二次加工
if self._secondary_func is not None:
report_fundamentals = self._secondary_func(report_fundamentals)
# 本地保存
if not os.path.exists(self._dir):
os.makedirs(self._dir)
file_name = self._dir + self.dest_table + '/' + str(report_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
report_fundamentals.to_csv(self._dir + self.dest_table + '/' + str(report_date) + '.csv',
encoding='UTF-8')
print(self._dir + self.dest_table + '/' + str(report_date) + '.csv')
\ No newline at end of file
#!/usr/bin/env python
# coding=utf-8
import os
import sys
import pdb
import argparse
from datetime import datetime
sys.path.append('..')
import config
from utillities.sync_util import SyncUtil
class SyncTradeDate(object):
def __init__(self):
self._unit = SyncUtil()
self._dir = config.RECORD_BASE_DIR + 'trade_date/'
def do_update(self, start_date, end_date, count):
trade_sets = self._unit.get_trades_ago('001002', start_date, end_date, count)
trade_sets.rename(columns={'TRADEDATE': 'trade_date'}, inplace=True)
# 本地保存
file_name = self._dir + 'trade_date.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
trade_sets.to_csv(file_name, encoding='utf-8')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=-1)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.rebuild:
if args.end_date == 0:
end_date = int(str(datetime.now().date()).replace('-', ''))
else:
end_date = args.end_date
processor = SyncTradeDate()
# processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
#!/usr/bin/env python
# coding=utf-8
import pdb
import os
import sys
import sqlalchemy as sa
import pandas as pd
import numpy as np
import collections
import argparse
from base_sync import BaseSync
sys.path.append('..')
from utillities.sync_util import SyncUtil
from datetime import datetime, date
from sqlalchemy.orm import sessionmaker
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import config
class SyncValuation(BaseSync):
def __init__(self):
self.sync_util = SyncUtil()
super(SyncValuation, self).__init__('valuation')
self.dir = config.RECORD_BASE_DIR + self.dest_table + '/'
def create_dest_tables(self):
create_sql = """create table `{0}`(
`id` varchar(32) NOT NULL,
`symbol` varchar(24) NOT NULL,
`trade_date` date NOT NULL,
`market_cap` decimal(19,4) DEFAULT NULL,
`circulating_market_cap` decimal(19,4) DEFAULT NULL,
`turnover_ratio` decimal(9,4) DEFAULT NULL,
`pb` decimal(19,4) DEFAULT NULL,
`pe_lfy` decimal(19,4) DEFAULT NULL,
`pe` decimal(19,4) DEFAULT NULL,
`ps_lfy` decimal(19,4) DEFAULT NULL,
`ps` decimal(19,4) DEFAULT NULL,
`pcf` decimal(19,4) DEFAULT NULL,
`capitalization` decimal(19,4) DEFAULT NULL,
`circulating_cap` decimal(19,4) DEFAULT NULL,
PRIMARY KEY(`id`,`symbol`,`trade_date`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
""".format(self.dest_table)
self.create_table(create_sql)
def get_valutaion(self, trade_date):
sql = """select a.SYMBOL as code,
a.TRADEDATE as trade_date,
a.TOTMKTCAP as market_cap,
a.NEGOTIABLEMV as circulating_market_cap,
a.TURNRATE as turnover_ratio,
a.PETTM as pe,
a.PELFY as pe_lfy,
a.PB as pb,
a.PSTTM as ps,
a.PSLFY as ps_lfy,
a.PCTTM as pcf,
b.TOTALSHARE as capitalization,
b.MKTSHARE as circulating_cap,
c.Exchange
from TQ_SK_FININDIC a left join TQ_SK_DQUOTEINDIC b
on a.SYMBOL=b.SYMBOL and a.TRADEDATE=b.TRADEDATE
left join TQ_OA_STCODE c
on a.SECODE = c.SECODE and a.SYMBOL = c.SYMBOL
where a.TRADEDATE='{0}';""".format(trade_date)
return pd.read_sql(sql, self.source)
def do_update(self, start_date, end_date, count, order='DESC'):
# 读取交易日
trade_sets = self.sync_util.get_trades_ago('001002', start_date, end_date, count, order)
trade_list = list(trade_sets['TRADEDATE'])
for trade_date in trade_list:
print(trade_date)
index_sets = self.get_valutaion(trade_date)
if index_sets.empty:
continue
try:
index_sets['symbol'] = np.where(index_sets['Exchange'] == '001002',
index_sets['code'] + '.XSHG',
index_sets['code'] + '.XSHE')
index_sets['id'] = index_sets['symbol'] + str(trade_date)
index_sets.drop(['Exchange', 'code'], axis=1, inplace=True)
# 本地保存
if not os.path.exists(self.dir):
os.makedirs(self.dir)
file_name = self.dir + str(trade_date) + '.csv'
if os.path.exists(str(file_name)):
os.remove(str(file_name))
index_sets.to_csv(file_name, encoding='UTF-8')
try:
self.delete_trade_data(trade_date)
index_sets.to_sql(name=self.dest_table, con=self.destination, if_exists='append', index=False)
except Exception as sql_err:
print(sql_err.orig.msg)
self.insert_or_update(index_sets)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--start_date', type=int, default=20070101)
parser.add_argument('--end_date', type=int, default=0)
parser.add_argument('--count', type=int, default=2)
parser.add_argument('--rebuild', type=bool, default=False)
parser.add_argument('--update', type=bool, default=False)
parser.add_argument('--schedule', type=bool, default=False)
args = parser.parse_args()
if args.end_date == 0:
end_date = int(datetime.now().date().strftime('%Y%m%d'))
else:
end_date = args.end_date
if args.rebuild:
processor = SyncValuation()
processor.create_dest_tables()
processor.do_update(args.start_date, end_date, args.count)
if args.update:
processor = SyncValuation()
processor.do_update(args.start_date, end_date, args.count)
if args.schedule:
processor = SyncValuation()
start_date = processor.get_start_date()
print('running schedule task, start date:', start_date, ';end date:', end_date)
processor.do_update(start_date, end_date, -1, '')
#!/usr/bin/env python
# coding=utf-8
import pdb
import pandas as pd
from factor.utillities.sync_util import SyncUtil
from vision.fm.signletion_engine import *
from vision.file_unit.balance import Balance
from vision.file_unit.income import Income
from vision.file_unit.cash_flow import CashFlow
key_columns = ['symbol','trade_date','pub_date','report_date']
def get_ttm_fundamental(stock_sets, ttm_factors, date, year = 1):
sync_util = SyncUtil()
max_year = int(int(str(date).replace('-','')) / 10000)
report_date_list = sync_util.ttm_report_date_by_year(date, year)
# 读取报表信息
new_fundamental = None
for key, value in ttm_factors.items():
ttm_fundamental = None
value_columns = []
for v in value:
if v not in key_columns:
value_columns.append(v)
for report_date in report_date_list:
fundamental_data = get_report(add_filter_trade(query(key,value),[report_date]))
fundamental_data = fundamental_data[-fundamental_data.duplicated()]
fundamental_data.set_index('symbol',inplace=True)
if ttm_fundamental is None:
ttm_fundamental = fundamental_data
else:
ttm_stock_sets = list(set(fundamental_data.index) & set(ttm_fundamental.index))
ttm_fundamental.loc[ttm_stock_sets,value_columns] += fundamental_data.loc[ttm_stock_sets,value_columns]
ttm_fundamental = ttm_fundamental.loc[ttm_stock_sets]
if new_fundamental is None:
new_fundamental = ttm_fundamental
else: #
new_fundamental = pd.merge(new_fundamental, ttm_fundamental, left_index=True, right_index=True)
new_fundamental['trade_date'] = date
if len(stock_sets) == 0:
return new_fundamental
else:
return new_fundamental.loc[stock_sets]
if __name__ == '__main__':
pdb.set_trace()
stock_sets = ['600016.XSHG','601229.XSHG','000651.XSHE']
ttm_factors = {Balance.__name__:[Balance.symbol,Balance.shortterm_loan,Balance.longterm_loan,Balance.total_liability],
CashFlow.__name__:[CashFlow.symbol,CashFlow.net_operate_cash_flow,CashFlow.net_finance_cash_flow],
Income.__name__:[Income.symbol, Income.total_operating_revenue,Income.total_operating_cost]}
get_ttm_fundamental(stock_sets, ttm_factors,'2018-10-21')
#!/usr/bin/env python
# coding=utf-8
import numpy as np
import pandas as pd
class CalcTools(object):
@classmethod
def is_zero(cls, data_frame):
return np.where(data_frame > -0.000001,
np.where(data_frame < 0.000001, True, False)
, False)
def change_single(params):
fundamentals_sets_year = params['fundamentals_sets_year']
sub_columns = params['sub_columns']
def year_update(df):
df.loc[df.index, sub_columns] = df[sub_columns] - df[sub_columns].shift(-1).fillna(0)
return df
stock_list = list(set(fundamentals_sets_year['code']))
new_fundamentals_sets = pd.DataFrame()
i = 0
for stock in stock_list:
i += 1
if i % 100 == 0:
print(params['year'], ':', i, '/', len(stock_list))
new_fundamentals_sets = new_fundamentals_sets.append(year_update(
fundamentals_sets_year[
fundamentals_sets_year['code'] == stock]))
return new_fundamentals_sets
def change_single_by_symbol(params):
fundamentals_sets = params['fundamentals_sets_symbol']
sub_columns = params['sub_columns']
def year_update(df):
df.loc[df.index, sub_columns] = df[sub_columns] - df[sub_columns].shift(-1).fillna(0)
return df
new_fundamentals_sets = pd.DataFrame()
year_list = list(set(fundamentals_sets['year']))
year_list.sort(reverse=True)
stock_list = list(set(fundamentals_sets['code']))
i = 0
for stock in stock_list:
i += 1
if i % 100 == 0:
print('cpu', params['cpu'], ':', i, '/', len(stock_list))
for year in year_list:
fundamentals_sets_stock = fundamentals_sets[fundamentals_sets['code'] == stock]
new_fundamentals_sets = new_fundamentals_sets.append(year_update(
fundamentals_sets_stock[
fundamentals_sets_stock['year'] == year]))
return new_fundamentals_sets
#!/usr/bin/env python
# coding=utf-8
import numpy as np
import numba as nb
import pdb
@nb.njit(nogil=True, cache=True)
def ls_fit(x, y, w):
x_bar = x.T * w
# b = np.linalg.solve(x_bar @ x, x_bar @ y)
b = np.linalg.solve(np.dot(x_bar, x), np.dot(x_bar, y))
return b
@nb.njit(nogil=True, cache=True)
def ls_fit_pinv(x, y, w):
x_bar = x.T * w
# b = np.linalg.pinv(x_bar @ x) @ x_bar @ y
b = np.dot(np.dot(np.linalg.pinv(np.dot(x_bar,x)), x_bar), y)
return b
@nb.njit(nogil=True, cache=True)
def ls_res(x, y, b):
# return y - x @ b
return y - np.dot(x,b)
@nb.njit(nogil=True, cache=True)
def ls_explain(x, b):
m, n = b.shape
return b.reshape((1, m, n)) * x.reshape((-1, m, 1))
def _sub_step(x, y, w, curr_idx, res):
curr_x, curr_y, curr_w = x[curr_idx], y[curr_idx], w[curr_idx]
try:
b = ls_fit(curr_x, curr_y, curr_w)
except np.linalg.linalg.LinAlgError:
b = ls_fit_pinv(curr_x, curr_y, curr_w)
res[curr_idx] = ls_res(curr_x, curr_y, b)
return curr_x, b
def neutralize(x, y, groups=None, detail=False,
weights = None):
pdb.set_trace()
if y.ndim == 1:
y = y.reshape((-1, 1))
if weights is None:
weights = np.ones(len(y), dtype=float)
output_dict = {}
if detail:
exposure = np.zeros(x.shape + (y.shape[1],))
explained = np.zeros(x.shape + (y.shape[1],))
output_dict['exposure'] = exposure
output_dict['explained'] = explained
if groups is not None:
res = np.zeros(y.shape)
index_diff, order = utils.groupby(groups)
start = 0
if detail:
for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1]
curr_x, b = _sub_step(x, y, weights, curr_idx, res)
exposure[curr_idx, :, :] = b
explained[curr_idx] = ls_explain(curr_x, b)
start = diff_loc + 1
else:
for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1]
_sub_step(x, y, weights, curr_idx, res)
start = diff_loc + 1
else:
try:
b = ls_fit(x, y, weights)
except np.linalg.linalg.LinAlgError:
b = ls_fit_pinv(x, y, weights)
res = ls_res(x, y, b)
if detail:
explained[:, :, :] = ls_explain(x, b)
exposure[:] = b
if output_dict:
return res, output_dict
else:
return res
#!/usr/bin/env python
# coding=utf-8
import pdb
import sys
sys.path.append("..")
from db.data_engine import DataFactory
from trade.daily_price import DailyPrice as TradeDailyPrice
from mlog import MLog
class DailyPrice(object):
def __init__(self, db_engine=None):
self.__db_engine = DataFactory.CreateEngine(1,'DNDS', db_engine)
def get_daily_price(self, symbol_sets, trade_date):
sql_pe = 'select B.SYMBOL, S.TRADEDATE, S.TCLOSE, S.AVGPRICE, B.LISTDATE from dnds.dbo.TQ_QT_SKDAILYPRICE AS S JOIN dnds.dbo.TQ_SK_BASICINFO as B on S.SECODE = B.SECODE and B.symbol IN(' + symbol_sets + ') and S.TRADEDATE = ' + str(trade_date) + ' ORDER BY S.TRADEDATE DESC'
daily_price_df = self.__db_engine.get_datasets(sql_pe)
return daily_price_df.set_index(['SYMBOL'])
@classmethod
def cn_stock_daily_price(cls, db_engine, symbol_sets, trade_date):
daily_price_sets = {}
sql_pe = 'select S.ID, B.SYMBOL, S.TRADEDATE,S.LCLOSE,S.TOPEN, S.TCLOSE,S.THIGH,S.TLOW,S.AVGPRICE,S.VOL,S.AMOUNT,B.LISTDATE from dnds.dbo.TQ_QT_SKDAILYPRICE AS S JOIN dnds.dbo.TQ_SK_BASICINFO as B on S.SECODE = B.SECODE and B.symbol IN(' + symbol_sets + ') and S.TRADEDATE = ' + str(trade_date)
daily_price_df = db_engine.get_datasets(sql_pe)
for index in daily_price_df.index:
data = daily_price_df.loc[index].values
daily_price = TradeDailyPrice()
daily_price.df_parser(data)
daily_price_sets[daily_price.symbol()] = daily_price
return daily_price_sets
@classmethod
def cn_future_daily_price(cls, db_engine, symbol, trade_date):
sql_pe = 'select Q.ID, B.CONTRACTCODE,Q.TRADEDATE,Q.LCLOSE,Q.TOPEN,Q.TCLOSE,Q.THIGH,Q.TLOW,Q.SETTLEPRICE,Q.VOL,Q.AMOUNT from dnds.dbo.TQ_QT_FUTURE AS Q JOIN dnds.dbo.TQ_FT_BASICINFO AS B ON B.SECODE = Q.SECODE WHERE B.CONTRACTCODE = \'' + symbol + '\' and Q.TRADEDATE = ' + str(trade_date)
print sql_pe
daily_price_df = db_engine.get_datasets(sql_pe)
for index in daily_price_df.index:
data = daily_price_df.loc[index].values
daily_price = TradeDailyPrice(dtype=2)
daily_price.df_parser(data)
return daily_price
@classmethod
def cn_future_daily_price_sets(cls, db_engine, symbol, start_date, end_date):
sql_pe = 'select Q.ID, B.CONTRACTCODE,Q.TRADEDATE,Q.LCLOSE,Q.TOPEN,Q.TCLOSE,Q.THIGH,Q.TLOW,Q.SETTLEPRICE,Q.VOL,Q.AMOUNT from dnds.dbo.TQ_QT_FUTURE AS Q JOIN dnds.dbo.TQ_FT_BASICINFO AS B ON B.SECODE = Q.SECODE WHERE B.CONTRACTCODE = \'' + symbol + '\' and Q.TRADEDATE >= ' + str(start_date) + ' and Q.TRADEDATE <= ' + str(end_date)
print sql_pe
daily_price_df = db_engine.get_datasets(sql_pe)
return daily_price_df
# -*- coding: utf-8 -*-
import threading
import logging
import datetime
import os
import sys
sys.path.append("..")
class Singleton(object):
objs = {}
objs_locker = threading.Lock()
def __new__(cls, *args, **kv):
if cls in cls.objs:
return cls.objs[cls]['obj']
cls.objs_locker.acquire()
try:
if cls in cls.objs:
return cls.objs[cls]['obj']
obj = object.__new__(cls)
cls.objs[cls] = {'obj':obj, 'init':False}
setattr(cls, '__init__', cls.decorate_init(cls.__init__))
finally:
cls.objs_locker.release()
@classmethod
def decorate_init(cls, fn):
def init_wrap(*args):
if not cls.objs[cls]['init']:
fn(*args)
cls.objs[cls]['init'] = True
return
return init_wrap
class MLog():
"""
classdocs
"""
@classmethod
def config(cls, name="logging", level=logging.DEBUG):
"""
Constructor
"""
dir = os.path.expandvars('$HOME') + '/MLOG/' + name + '/'
if not os.path.exists(dir):
os.makedirs(dir)
filename = dir + name + '_' + datetime.datetime.now().strftime('%b_%d_%H_%M')+'.log'
# [Tue Nov 15 11:35:53 2016] [notice] Apache/2.2.15 (Unix) DAV/2 PHP/5.3.3 mod_ssl/2.2.15 OpenSSL/1.0.1e-fips configured -- resuming normal operations
format_str = "[%(process)d %(thread)d][%(asctime)s][%(filename)s line:%(lineno)d][%(levelname)s] %(message)s"
# define a Handler which writes INFO messages or higher to the sys.stderr
logging.basicConfig(level=level,
format=format_str,
datefmt='%m-%d %H:%M',
filename=filename,
filemode='w')
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter(format_str)
console.setFormatter(formatter)
# 将定义好的console日志handler添加到root logger
logging.getLogger('').addHandler(console)
@classmethod
def write(self):
# logging.error()
return logging
# mlog = MLog()
#!/usr/bin/env python
# coding=utf-8
import sqlalchemy as sa
import pandas as pd
from datetime import datetime
import pdb
class SyncUtil(object):
def __init__(self, source=None, is_db=True):
# 源数据库
if is_db:
if source == None:
self.source = sa.create_engine("mssql+pymssql://reader:reader@10.15.97.127:1433/dnds")
else:
self.source = source
# 获取交易日
def get_all_trades(self, exchange, start_date, end_date):
sql = """select TRADEDATE FROM TQ_OA_TRDSCHEDULE WHERE EXCHANGE = '{0}'
AND ISVALID = 1 AND TRADEDATE >= {1} and TRADEDATE <= {2} ORDER BY TRADEDATE DESC;""".format(exchange,
start_date,
end_date)
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
# 获取交易日
def get_trades_ago(self, exchange, start_date, end_date, count, order='DESC'):
if count == -1:
top = ''
else:
top = """top ({0})""".format(count)
sql = """select {0} TRADEDATE FROM TQ_OA_TRDSCHEDULE WHERE EXCHANGE = '{1}'
AND ISVALID = 1 AND TRADEDATE >= {2} AND TRADEDATE <= {3} ORDER BY TRADEDATE {4}; """.format(top,
exchange,
start_date,
end_date,
order)
trades_sets = pd.read_sql(sql, self.source)
return trades_sets
# 指定年份 ttm 周期计算
def ttm_report_date_by_year(self, end_date, year):
end_date = str(end_date).replace('-', '')
end_datetime = datetime.strptime(end_date, '%Y%m%d')
ttm_report_list = []
start_year = end_datetime.year - year + 1
pos_year = end_datetime.year
while pos_year >= start_year:
ttm_report_list += self.ttm_report_date(
str(pos_year) + '-' + str(end_datetime.month) + '-' + str(end_datetime.day))
pos_year -= 1
ttm_report_list.sort(reverse=True)
return ttm_report_list
# ttm周期计算
def ttm_report_date(self, end_date):
end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
ttm_report_list = []
if end_datetime.month * 100 + end_datetime.day < 501:
ttm_report_list = [(end_datetime.year - 2) * 10000 + 1231,
(end_datetime.year - 1) * 10000 + 331,
(end_datetime.year - 1) * 10000 + 630,
(end_datetime.year - 1) * 10000 + 930]
elif 501 <= (end_datetime.month * 100 + end_datetime.day) < 901:
ttm_report_list = [(end_datetime.year - 1) * 10000 + 331,
(end_datetime.year - 1) * 10000 + 630,
(end_datetime.year - 1) * 10000 + 930,
(end_datetime.year) * 10000 + 331]
elif 901 <= (end_datetime.month * 100 + end_datetime.day) < 1101:
ttm_report_list = [(end_datetime.year - 1) * 10000 + 630,
(end_datetime.year - 1) * 10000 + 930,
(end_datetime.year) * 10000 + 331,
(end_datetime.year) * 10000 + 630]
elif 1101 <= (end_datetime.month * 100 + end_datetime.day):
ttm_report_list = [(end_datetime.year - 1) * 10000 + 1231,
(end_datetime.year) * 10000 + 331,
(end_datetime.year) * 10000 + 630,
(end_datetime.year) * 10000 + 930]
return ttm_report_list
# 获取报告日期
def create_report_date(self, min_year, max_year):
report_date_list = []
start_date = min_year - 1
while start_date <= max_year:
report_date_list.append(start_date * 10000 + 331)
report_date_list.append(start_date * 10000 + 630)
report_date_list.append(start_date * 10000 + 930)
report_date_list.append(start_date * 10000 + 1231)
start_date += 1
report_date_list.sort()
return report_date_list
# 获取区间
def every_report_range(self, trade_date, report_date_list):
report_date_list.sort(reverse=True)
start_flag = 0
start_count = 0
for report_date in report_date_list:
if int(trade_date) >= report_date:
start_flag = 1
if start_flag == 1:
start_count += 1
if start_count == 2:
return (trade_date, report_date)
return (0, 0)
# 财务报告时间换算
def plus_year(self, row):
# 331 1, 603 2, 930 3, 1231 4
row['year'] = row['report_date'].year
if row['report_date'].month * 100 + row['report_date'].day == 331:
row['report_type'] = 1
elif row['report_date'].month * 100 + row['report_date'].day == 630:
row['report_type'] = 2
elif row['report_date'].month * 100 + row['report_date'].day == 930:
row['report_type'] = 3
elif row['report_date'].month * 100 + row['report_date'].day == 1231:
row['report_type'] = 4
return row
if __name__ == "__main__":
pdb.set_trace()
sync = SyncUtil()
sync.ttm_report_date_by_year('2018-06-10', 5)
#!/usr/bin/env python
# coding=utf-8
import time
class TimeCommon(object):
@classmethod
def get_end_time(cls, end_date, end_time):
s = time.mktime(time.strptime(str(end_date) + ' ' + str(end_time), '%Y%m%d %H:%M:%S'))
return int(s)
#!/usr/bin/env python
# coding=utf-8
import pdb
import sys
import os
import pandas as pd
from collections import OrderedDict
import collections
sys.path.append("../../")
from factor import config
class TradeDate(object):
def __init__(self):
self._all_trade_file = config.RECORD_BASE_DIR + 'trade_date/' + 'trade_date.csv'
self._trade_date_sets = OrderedDict()
self._load_trade_date()
def _load_trade_date(self):
if os.path.exists(self._all_trade_file):
trade_date = pd.read_csv(self._all_trade_file, index_col=0)
for index in trade_date.index:
self._trade_date_sets[int(trade_date.loc[index].values[0])] = int(trade_date.loc[index].values[0])
self._trade_date_sets = collections.OrderedDict(sorted(self._trade_date_sets.items(),
key=lambda t: t[0], reverse=False))
def trade_date_sets_ago(self, start_date, end_date, count):
sub_trade_date = []
trade_date_sets = collections.OrderedDict(
sorted(self._trade_date_sets.items(), key=lambda t: t[0], reverse=True))
start_flag = 0
start_count = 0
for trade_date, values in trade_date_sets.items():
# print(trade_date, start_date, end_date)
if trade_date <= end_date:
start_flag = 1
if start_flag == 1:
if start_date <= trade_date and start_count != count:
sub_trade_date.append(trade_date)
start_count += 1
else:
break
return sub_trade_date
def trade_date_sets(self, start_date, end_date):
sub_trade_date = []
trade_date_sets = collections.OrderedDict(
sorted(self._trade_date_sets.items(), key=lambda t: t[0], reverse=False))
start_flag = 0
for trade_date, values in trade_date_sets.items():
print(trade_date, start_date, end_date)
if trade_date == start_date:
start_flag = 1
if start_flag == 1:
sub_trade_date.append(trade_date)
if end_date <= trade_date:
break
return sub_trade_date
def trade_date_sets_range(self, start_date, range_day, flag=1):
start_count = 0
sub_trade_date = []
if flag == 0:
trade_date_sets = collections.OrderedDict(
sorted(self._trade_date_sets.items(), key=lambda t: t[0], reverse=False))
else:
trade_date_sets = collections.OrderedDict(
sorted(self._trade_date_sets.items(), key=lambda t: t[0], reverse=True))
start_flag = 0
for trade_date, values in trade_date_sets.items():
if trade_date == start_date:
start_flag = 1
if start_flag == 1:
sub_trade_date.append(trade_date)
start_count += 1
if start_count >= range_day:
break
return sub_trade_date
if __name__ == '__main__':
tr = TradeDate()
result = tr.trade_date_sets_ago(20070101, 20190101, -1)
print(result)
from ultron.cluster.invoke.submit_tasks import submit_task
submit_task.submit_packet('ly100002', 'factor')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment