Commit e010d7fc authored by Dr.李's avatar Dr.李

added quantile analysis and its corresponding tests

parent 7d05b4a9
......@@ -6,41 +6,18 @@ Created on 2017-5-25
"""
from typing import Optional
from typing import List
from typing import Tuple
import numpy as np
import pandas as pd
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.longshortbulder import long_short_build
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.percentbuilder import percent_build
from alphamind.portfolio.linearbuilder import linear_build
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None,
risk_factors: Optional[np.ndarray]=None,
post_process: Optional[List]=None,
do_neutralize: Optional[bool]=True) -> np.ndarray:
new_factors = raw_factors
if pre_process:
for p in pre_process:
new_factors = p(new_factors)
if risk_factors is not None and do_neutralize:
new_factors = neutralize(risk_factors, new_factors)
if post_process:
for p in pre_process:
new_factors = p(new_factors)
return new_factors
from alphamind.analysis.utilities import FDataPack
def build_portfolio(er: np.ndarray,
......@@ -69,86 +46,6 @@ def build_portfolio(er: np.ndarray,
return weight
class FDataPack(object):
def __init__(self,
raw_factors: np.ndarray,
factor_names: List[str]=None,
codes: List=None,
groups: Optional[np.ndarray]=None,
benchmark: Optional[np.ndarray]=None,
constraints: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
risk_names: List[str]=None):
self.raw_factors = raw_factors
if factor_names:
self.factor_names = factor_names
else:
self.factor_names = ['factor' + str(i) for i in range(raw_factors.shape[1])]
self.codes = codes
self.groups = groups.flatten()
if benchmark is not None:
self.benchmark = benchmark.flatten()
else:
self.benchmark = None
self.risk_exp = risk_exp
self.constraints = constraints
self.risk_names = risk_names
def benchmark_constraints(self) -> np.ndarray:
return self.benchmark @ self.constraints
def settle(self, weights: np.ndarray, dx_return: np.ndarray) -> pd.DataFrame:
weights = weights.flatten()
dx_return = dx_return.flatten()
if self.benchmark is not None:
net_pos = weights - self.benchmark
else:
net_pos = weights
ret_arr = net_pos * dx_return
if self.groups is not None:
ret_agg = pd.Series(ret_arr).groupby(self.groups).sum()
ret_agg.loc['total'] = ret_agg.sum()
else:
ret_agg = pd.Series(ret_arr.sum(), index=['total'])
ret_agg.index.name = 'industry'
ret_agg.name = 'er'
pos_table = pd.DataFrame(net_pos, columns=['weight'])
pos_table['ret'] = dx_return
if self.groups is not None:
ic_table = pos_table.groupby(self.groups).corr()['ret'].loc[(slice(None), 'weight')]
ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
else:
ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values},
index=ret_agg.index)
def factor_processing(self, pre_process, pos_process, do_neutralize) -> np.ndarray:
if self.risk_exp is None:
return factor_processing(self.raw_factors,
pre_process,
pos_process,
do_neutralize=do_neutralize)
else:
return factor_processing(self.raw_factors,
pre_process,
self.risk_exp,
pos_process,
do_neutralize)
def factor_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray,
industry: np.ndarray,
......
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
from typing import Optional
import numpy as np
import pandas as pd
from alphamind.utilities import agg_mean
from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.analysis.utilities import FDataPack
def quantile_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray,
dx_return: np.ndarray,
n_bins: int=5,
benchmark: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
do_neutralize=True,
**kwargs):
if 'pre_process' in kwargs:
pre_process = kwargs['pre_process']
del kwargs['pre_process']
else:
pre_process = [winsorize_normal, standardize]
if 'post_process' in kwargs:
post_process = kwargs['post_process']
del kwargs['post_process']
else:
post_process = [standardize]
data_pack = FDataPack(raw_factors=factors.values,
risk_exp=risk_exp)
er = data_pack.factor_processing(pre_process, post_process, do_neutralize) @ factor_weights
return q_anl_impl(er, n_bins, dx_return)
def q_anl_impl(er: np.ndarray,
n_bins: int,
dx_return: np.ndarray) -> np.ndarray:
er = er.flatten()
q_groups = quantile(er, n_bins)
if dx_return.ndim < 2:
dx_return.shape = -1, 1
group_return = agg_mean(q_groups, dx_return).flatten()
return group_return
if __name__ == '__main__':
n = 5000
n_f = 5
n_bins = 5
x = np.random.randn(n, 5)
risk_exp = np.random.randn(n, 3)
x_w = np.random.randn(n_f)
r = np.random.randn(n)
f_df = pd.DataFrame(x)
calculated = quantile_analysis(f_df,
x_w,
r,
risk_exp=risk_exp,
n_bins=n_bins,
do_neutralize=True,
pre_process=[winsorize_normal, standardize],
post_process=[standardize])
er = x_w @ f_df.T
expected = q_anl_impl(er, 5, r)
print(calculated)
print(expected)
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
from typing import List
from typing import Optional
import numpy as np
import pandas as pd
from alphamind.data.neutralize import neutralize
def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None,
risk_factors: Optional[np.ndarray]=None,
post_process: Optional[List]=None,
do_neutralize: Optional[bool]=True) -> np.ndarray:
new_factors = raw_factors
if pre_process:
for p in pre_process:
new_factors = p(new_factors)
if risk_factors is not None and do_neutralize:
new_factors = neutralize(risk_factors, new_factors)
if post_process:
for p in pre_process:
new_factors = p(new_factors)
return new_factors
class FDataPack(object):
def __init__(self,
raw_factors: np.ndarray,
factor_names: List[str]=None,
codes: List=None,
groups: Optional[np.ndarray]=None,
benchmark: Optional[np.ndarray]=None,
constraints: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
risk_names: List[str]=None):
self.raw_factors = raw_factors
if factor_names:
self.factor_names = factor_names
else:
self.factor_names = ['factor' + str(i) for i in range(raw_factors.shape[1])]
self.codes = codes
if groups is not None:
self.groups = groups.flatten()
else:
self.groups = None
if benchmark is not None:
self.benchmark = benchmark.flatten()
else:
self.benchmark = None
self.risk_exp = risk_exp
self.constraints = constraints
self.risk_names = risk_names
def benchmark_constraints(self) -> np.ndarray:
return self.benchmark @ self.constraints
def settle(self, weights: np.ndarray, dx_return: np.ndarray) -> pd.DataFrame:
weights = weights.flatten()
dx_return = dx_return.flatten()
if self.benchmark is not None:
net_pos = weights - self.benchmark
else:
net_pos = weights
ret_arr = net_pos * dx_return
if self.groups is not None:
ret_agg = pd.Series(ret_arr).groupby(self.groups).sum()
ret_agg.loc['total'] = ret_agg.sum()
else:
ret_agg = pd.Series(ret_arr.sum(), index=['total'])
ret_agg.index.name = 'industry'
ret_agg.name = 'er'
pos_table = pd.DataFrame(net_pos, columns=['weight'])
pos_table['ret'] = dx_return
if self.groups is not None:
ic_table = pos_table.groupby(self.groups).corr()['ret'].loc[(slice(None), 'weight')]
ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
else:
ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values},
index=ret_agg.index)
def factor_processing(self, pre_process, pos_process, do_neutralize) -> np.ndarray:
if self.risk_exp is None:
return factor_processing(self.raw_factors,
pre_process,
pos_process,
do_neutralize=do_neutralize)
else:
return factor_processing(self.raw_factors,
pre_process,
self.risk_exp,
pos_process,
do_neutralize)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import numpy as np
def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
n = x.size
sorter = x.argsort()
inv = np.empty(n, dtype=int)
inv[sorter] = np.arange(n, dtype=int)
bin_size = float(n) / n_bins
pillars = [int(i * bin_size) for i in range(1, n_bins+1)]
q_groups = np.empty(n, dtype=int)
starter = 0
for i, r in enumerate(pillars):
q_groups[(inv >= starter) & (inv < r)] = i
starter = r
return q_groups
......@@ -12,7 +12,7 @@ from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize
from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints
from alphamind.analysis.factoranalysis import factor_processing
from alphamind.analysis.utilities import factor_processing
from alphamind.analysis.factoranalysis import factor_analysis
......
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.analysis.quantileanalysis import q_anl_impl
from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.analysis.utilities import factor_processing
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.quantile import quantile
class TestQuantileAnalysis(unittest.TestCase):
def setUp(self):
n = 5000
n_f = 5
self.x = np.random.randn(n, 5)
self.x_w = np.random.randn(n_f)
self.r = np.random.randn(n)
self.risk_exp = np.random.randn(n, 3)
self.n_bins = 10
def test_q_anl_impl(self):
n_bins = 5
x = self.x[:, 0]
q_groups = quantile(x, n_bins)
s = pd.Series(self.r, index=q_groups)
expected_res = s.groupby(level=0).mean()
calculated_res = q_anl_impl(x, n_bins, self.r)
np.testing.assert_array_almost_equal(expected_res.values, calculated_res)
def test_quantile_analysis_simple(self):
f_df = pd.DataFrame(self.x)
calculated = quantile_analysis(f_df,
self.x_w,
self.r,
n_bins=self.n_bins,
do_neutralize=False,
pre_process=[],
post_process=[])
er = self.x_w @ self.x.T
expected = q_anl_impl(er, self.n_bins, self.r)
np.testing.assert_array_almost_equal(calculated, expected)
def test_quantile_analysis_with_factor_processing(self):
f_df = pd.DataFrame(self.x)
calculated = quantile_analysis(f_df,
self.x_w,
self.r,
n_bins=self.n_bins,
do_neutralize=True,
risk_exp=self.risk_exp,
pre_process=[winsorize_normal, standardize],
post_process=[standardize])
er = self.x_w @ factor_processing(self.x,
[winsorize_normal, standardize],
self.risk_exp,
[standardize],
True).T
expected = q_anl_impl(er, self.n_bins, self.r)
np.testing.assert_array_almost_equal(calculated, expected)
if __name__ == '__main__':
unittest.main()
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import unittest
import numpy as np
from alphamind.data.quantile import quantile
class TestQuantile(unittest.TestCase):
def test_quantile(self):
n = 5000
bins = 10
s = np.random.randn(n)
calculated = quantile(s, bins)
rank = s.argsort().argsort()
bin_size = float(n) / bins
pillars = [int(i * bin_size) for i in range(1, bins + 1)]
starter = 0
for i, r in enumerate(pillars):
self.assertTrue(np.all(calculated[(rank >= starter) & (rank < r)] == i))
starter = r
if __name__ == "__main__":
unittest.main()
......@@ -14,6 +14,7 @@ from alphamind.utilities import alpha_logger
from alphamind.tests.data.test_neutralize import TestNeutralize
from alphamind.tests.data.test_standardize import TestStandardize
from alphamind.tests.data.test_winsorize import TestWinsorize
from alphamind.tests.data.test_quantile import TestQuantile
from alphamind.tests.portfolio.test_constraints import TestConstraints
from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
......@@ -24,12 +25,14 @@ from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
if __name__ == '__main__':
runner = TestRunner([TestNeutralize,
TestStandardize,
TestWinsorize,
TestQuantile,
TestConstraints,
TestLongShortBuild,
TestRankBuild,
......@@ -39,6 +42,7 @@ if __name__ == '__main__':
TestSimpleSettle,
TestRiskAnalysis,
TestPerformanceAnalysis,
TestFactorAnalysis],
TestFactorAnalysis,
TestQuantileAnalysis],
alpha_logger)
runner.run()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment