Commit 2f997476 authored by Dr.李's avatar Dr.李

added sample notebooks

parent e86d6f8f
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%pylab inline\n",
"import matplotlib.pyplot as plt\n",
"plt.style.use('ggplot')\n",
"print(plt.style.available)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import sqlalchemy\n",
"import pandas as pd\n",
"import alphamind.data.neutralize as ne\n",
"import alphamind.data.winsorize as ws\n",
"import alphamind.data.standardize as st\n",
"import alphamind.portfolio.rankbuilder as rb"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"risk_factor_table = 'risk_factor_500'\n",
"benchmark = 'zz500'\n",
"factor = 'ROEAfterNonRecurring' # 'DROEAfterNonRecurring'\n",
"\n",
"conn = sqlalchemy.create_engine('mysql+mysqldb://root:we083826@localhost:3306/multifactor?charset=utf8')\n",
"df = pd.read_sql('select factor_data.{0}, trade_data.Return as dailyReturn, {1}.* '\n",
" 'from factor_data, trade_data, {1} '\n",
" 'where factor_data.Date = {1}.Date and factor_data.Code = {1}.Code '\n",
" 'and factor_data.Date = trade_data.Date and factor_data.Code = trade_data.Code;'.format(factor, \n",
" risk_factor_table), \n",
" conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del df['Bank']\n",
"del df['NonBankFinancial']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"risk_facto_cols = df.columns[4:]\n",
"risk_facto_cols"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df['d1ret'] = df.dailyReturn.groupby(df.Code).shift(-1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"benchmark_data = pd.read_sql('select {0}, Date from index_data'.format(benchmark), conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"benchmark_data['ret'] = benchmark_data[benchmark] / benchmark_data[benchmark].shift(1) - 1.\n",
"benchmark_data['d1ret_b'] = benchmark_data['ret'] .shift(-1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.merge(df, benchmark_data[['Date', 'd1ret_b']], on='Date', how='inner')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.dropna(inplace=True)\n",
"df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Date Preprocessing (Winsorize -> Standardize -> neutralize)\n",
"-----------------------------------------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y = total_data[factor].values\n",
"y.shape = -1, 1\n",
"groups = total_data.Date.values.astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"total_data['res'] = ne.neutralize(total_data[risk_facto_cols].values,\n",
" st.standardize(ws.winsorize_normal(y, groups=groups),\n",
" groups=groups),\n",
" groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data[risk_facto_cols].tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data[[factor, 'res', 'Date', 'Code']].tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Factor Performance (long_short)\n",
"------------------------------------------------------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"total_data['pos'] = total_data.res.groupby(groups).apply(lambda x: x / np.abs(x).sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data[['pos', 'res', 'dailyReturn', 'd1ret', 'd1ret_b', 'Code', 'Date']].tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_series = (total_data.pos * (total_data.d1ret - total_data.d1ret_b)).groupby(total_data.Date).sum()\n",
"ret_series.index = pd.to_datetime(ret_series.index, format='%Y%m%d')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_series.cumsum().plot(figsize=(14,7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_series.cumsum()[-20:].plot(figsize=(14,7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data.pos.groupby(groups).apply(lambda x: np.abs(x).sum()).head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Factor Performance (Long Only - Top 100 equal weighted)\n",
"------------------------------------------------------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"use_rank = 100\n",
"total_data['pos'] = rb.rank_build(total_data.res.values, use_rank, groups) / use_rank"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data[['pos', 'res', 'dailyReturn', 'd1ret', 'd1ret_b', 'Code', 'Date']].tail()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_series = (total_data.pos * (total_data.d1ret - total_data.d1ret_b)).groupby(groups).sum()\n",
"ret_series.index = pd.to_datetime(ret_series.index, format='%Y%m%d')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_series.cumsum().plot(figsize=(14,7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ret_series.cumsum()[-20:].plot(figsize=(14,7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_data.pos.groupby(groups).sum().head()"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Risk Exposure\n",
"-------------------------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"excess_return = (total_data.d1ret - total_data.d1ret_b).values\n",
"excess_return.shape = -1, 1\n",
"pos_series = total_data.pos.values\n",
"pos_series.shape = -1, 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"to_explain = total_data.pos.values * excess_return\n",
"depends_pos = total_data[risk_facto_cols].values\n",
"depends = depends_pos * excess_return"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"idiosyncratic, other_stats = ne.neutralize(depends, to_explain, groups, output_exposure=True, output_explained=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"systemetic = other_stats['explained']\n",
"exposure = other_stats['exposure']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"analyis_table = hstack((idiosyncratic, systemetic[:, :, 0]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cols = ['idiosyncratic']\n",
"cols.extend(risk_facto_cols)\n",
"analyis_table = pd.DataFrame(analyis_table, columns=cols, index=groups)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"aggregated_bars = analyis_table.groupby(level=0).sum()\n",
"aggregated_bars.index = pd.to_datetime(aggregated_bars.index, format='%Y%m%d')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"top_sources = aggregated_bars.sum().abs().sort_values(ascending=False).index[:10]\n",
"aggregated_bars.sum().abs().sort_values(ascending=False).plot(kind='bar', figsize=(16, 8))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"aggregated_bars[top_sources].cumsum().plot(figsize=(14, 7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"exposure_table = pd.DataFrame(exposure[:, :, 0], columns=risk_facto_cols, index=groups)\n",
"exposure_table = exposure_table.groupby(level=0).first()\n",
"exposure_table.index = pd.to_datetime(exposure_table.index, format='%Y%m%d')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"exposure_table[top_sources[1:]].plot(figsize=(14,7))\n",
"plt.legend(loc='upper center', ncol=len(top_sources[1:]) // 3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clean up\n",
"-----------------------"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del df\n",
"del total_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_factors = [factor] + risk_facto_cols.tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_factors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"aggregated_bars.sum().abs().sort_values(ascending=False)[:10].plot?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"aggregated_bars.sum().abs().sort_values(ascending=False)[:10].plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment