Commit a01cabdd authored by Dr.李's avatar Dr.李

added example directory

parent f391d805
# -*- coding: utf-8 -*-
"""
Created on 2017-5-15
@author: cheng.li
"""
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-5-15
@author: cheng.li
"""
risk_factors_500 = ['CommunicationsAndTransportation',
'LeisureServices',
'MultiMedia',
'PublicUtility',
'Agriculture',
'ChemicalIndustry',
'MedicationAndBio',
'CommercialTrade',
'DefenseIndustry',
'HouseholdAppliances',
'ConstructionAndMaterial',
'BuildingDecoration',
'RealEstate',
'DiversifiedMetal',
'Machinary',
'MotorVehicle',
'ElectronicIndustry',
'ElectricalEquip',
'TextileAndGarment',
'Synthetics',
'Computer',
'LightManufacturing',
'Telecoms',
'ExtractiveIndustry',
'Metal',
'FoodAndBeverage',
'Size']
# -*- coding: utf-8 -*-
"""
Created on 2017-5-15
@author: cheng.li
"""
import numpy as np
import sqlalchemy
import pandas as pd
from alphamind.examples.config import risk_factors_500
from alphamind.data.standardize import standardize
from alphamind.data.neutralize import neutralize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.linearbuilder import linear_build
ref_date = '2017-05-11'
common_factors = ['EPSAfterNonRecurring', 'DivP']
prod_factors = ['CFinc1', 'BDTO', 'RVOL']
factor_weights = 1. / np.array([15.44, 32.72, 49.90, 115.27, 97.76])
factor_weights = factor_weights / factor_weights.sum()
index_components = '500Weight'
engine = sqlalchemy.create_engine('mysql+mysqldb://user:pwd@host/multifactor?charset=utf8')
common_factors_df = pd.read_sql("select Date, Code, 申万一级行业, {0} from factor_data where Date = '{1}'"
.format(','.join(common_factors), ref_date), engine)
prod_factors_df = pd.read_sql("select Date, Code, {0} from prod_500 where Date = '{1}'"
.format(','.join(prod_factors), ref_date), engine)
risk_factor_df = pd.read_sql("select Date, Code, {0} from risk_factor_500 where Date = '{1}'"
.format(','.join(risk_factors_500), ref_date), engine)
index_components_df = pd.read_sql("select Date, Code, {0} from index_components where Date = '{1}'"
.format(index_components, ref_date), engine)
total_data = pd.merge(common_factors_df, prod_factors_df, on=['Date', 'Code'])
total_data = pd.merge(total_data, risk_factor_df, on=['Date', 'Code'])
total_data = pd.merge(total_data, index_components_df, on=['Date', 'Code'])
total_data = total_data[total_data[index_components] != 0]
total_data[index_components] = total_data[index_components] / 100.0
total_factors = common_factors + prod_factors
risk_factors_names = risk_factors_500 + ['Market']
total_data['Market'] = 1.
all_factors = total_data[total_factors]
risk_factors = total_data[risk_factors_names]
factor_processed = neutralize(risk_factors.values,
standardize(winsorize_normal(all_factors.values)))
normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)
er = normed_factor @ factor_weights
# portfolio construction
bm = total_data[index_components].values
lbound = 0.
ubound = 0.01 + bm
lbound_exposure = -0.01
ubound_exposure = 0.01
risk_exposure = total_data[risk_factors_names].values
status, value, ret = linear_build(er,
lbound=lbound,
ubound=ubound,
risk_exposure=risk_exposure,
bm=bm,
risk_target=(lbound_exposure, ubound_exposure),
solver='GLPK')
if status != 'optimal':
raise ValueError('target is not feasible')
else:
portfolio = pd.DataFrame({'weight': ret,
'industry': total_data['申万一级行业'].values,
'zz500': total_data[index_components].values}, index=total_data.Code)
print(portfolio)
This diff is collapsed.
......@@ -3,7 +3,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%pylab inline\n",
......@@ -66,7 +68,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -132,7 +136,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_cols = raw_df.columns[8:35]\n",
......@@ -176,7 +182,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"benchmark_data = pd.read_sql('select {0}, Date from index_data'.format(benchmark), engine)"
......@@ -208,7 +216,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.dropna(inplace=True)\n",
......@@ -226,7 +236,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"engine = sqlalchemy.create_engine('mysql+pymysql://sa:we083826@10.63.6.176/multifactor?charset=utf8')\n",
......@@ -264,7 +276,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"beta_table.to_hdf('factor_data.hdf', 'beta')"
......@@ -284,7 +298,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.shape"
......@@ -293,7 +309,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"analysis_factors = prod_factors + ['Size']\n",
......@@ -309,7 +327,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ob_risk_factor_cols"
......@@ -394,7 +414,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -409,7 +431,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data.tail()"
......@@ -426,7 +450,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -436,7 +462,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data[['pos_long_short', 'res', 'dailyReturn', 'd1ret', 'd1ret_b', 'Code', 'Date']].tail()"
......@@ -445,7 +473,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"to_look_into = total_data[ob_risk_factor_cols].multiply(total_data.pos_long_short, axis=0)"
......
......@@ -3,7 +3,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%pylab inline\n",
......@@ -15,7 +17,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -80,7 +84,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -108,7 +114,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"len(total_data)"
......@@ -125,7 +133,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.data.standardize import standardize\n",
......@@ -136,7 +146,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_factors = common_factor_cols.append(prod_factor_cols)\n",
......@@ -152,7 +164,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -164,7 +178,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=total_data.Date)"
......@@ -192,7 +208,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%%time\n",
......@@ -227,7 +245,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-180:].cumsum().plot(figsize=(16, 8))"
......@@ -236,7 +256,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
......@@ -245,7 +267,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum()[-60:].cumsum().plot(figsize=(16, 8))"
......@@ -254,7 +278,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[top_factors].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
......@@ -263,7 +289,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
......@@ -272,7 +300,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
......@@ -292,7 +322,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_corr = pos_corr.loc[total_factors, total_factors]"
......@@ -301,7 +333,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"turn_over_table = {}\n",
......@@ -357,7 +391,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from alphamind.portfolio.linearbuilder import linear_build"
......@@ -366,7 +402,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_data[total_factors] = normed_factor.values\n",
......@@ -376,7 +414,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"marke_netural_pos = {}\n",
......@@ -414,7 +454,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_df = pd.DataFrame(marke_netural_pos, index=total_data.Date)"
......@@ -423,17 +465,21 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_mat = (pos_df.values - total_data[[index_components_name]].values / 100.) * total_data[['dret']].values\n",
"ret_df = pd.DataFrame(ret_mat, columns=normed_factor.columns, index=normed_factor.index)"
"ret_df = pd.DataFrame(ret_mat, columns=pos_df.columns, index=normed_factor.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum().cumsum().plot(figsize=(16, 8))"
......@@ -442,7 +488,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ret_df[prod_factor_cols].groupby(level=0).sum()[-90:].cumsum().plot(figsize=(16, 8))"
......@@ -451,7 +499,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_corr = pos_df.corr()\n",
......@@ -461,7 +511,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"turn_over_table = {}\n",
......@@ -482,7 +534,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"pos_corr = pos_corr.loc[total_factors, total_factors]\n",
......@@ -498,7 +552,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"turn_over_table"
......@@ -515,7 +571,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"risk_factor_values = total_data[risk_factor_cols].values\n",
......@@ -525,7 +583,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"for i, name in enumerate(total_factors):\n",
......@@ -539,7 +599,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"aggregated_risk.max()"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment