Commit e010d7fc authored by Dr.李's avatar Dr.李

added quantile analysis and its corresponding tests

parent 7d05b4a9
...@@ -6,41 +6,18 @@ Created on 2017-5-25 ...@@ -6,41 +6,18 @@ Created on 2017-5-25
""" """
from typing import Optional from typing import Optional
from typing import List
from typing import Tuple from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.longshortbulder import long_short_build from alphamind.portfolio.longshortbulder import long_short_build
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.percentbuilder import percent_build from alphamind.portfolio.percentbuilder import percent_build
from alphamind.portfolio.linearbuilder import linear_build from alphamind.portfolio.linearbuilder import linear_build
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.analysis.utilities import FDataPack
def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None,
risk_factors: Optional[np.ndarray]=None,
post_process: Optional[List]=None,
do_neutralize: Optional[bool]=True) -> np.ndarray:
new_factors = raw_factors
if pre_process:
for p in pre_process:
new_factors = p(new_factors)
if risk_factors is not None and do_neutralize:
new_factors = neutralize(risk_factors, new_factors)
if post_process:
for p in pre_process:
new_factors = p(new_factors)
return new_factors
def build_portfolio(er: np.ndarray, def build_portfolio(er: np.ndarray,
...@@ -69,86 +46,6 @@ def build_portfolio(er: np.ndarray, ...@@ -69,86 +46,6 @@ def build_portfolio(er: np.ndarray,
return weight return weight
class FDataPack(object):
def __init__(self,
raw_factors: np.ndarray,
factor_names: List[str]=None,
codes: List=None,
groups: Optional[np.ndarray]=None,
benchmark: Optional[np.ndarray]=None,
constraints: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
risk_names: List[str]=None):
self.raw_factors = raw_factors
if factor_names:
self.factor_names = factor_names
else:
self.factor_names = ['factor' + str(i) for i in range(raw_factors.shape[1])]
self.codes = codes
self.groups = groups.flatten()
if benchmark is not None:
self.benchmark = benchmark.flatten()
else:
self.benchmark = None
self.risk_exp = risk_exp
self.constraints = constraints
self.risk_names = risk_names
def benchmark_constraints(self) -> np.ndarray:
return self.benchmark @ self.constraints
def settle(self, weights: np.ndarray, dx_return: np.ndarray) -> pd.DataFrame:
weights = weights.flatten()
dx_return = dx_return.flatten()
if self.benchmark is not None:
net_pos = weights - self.benchmark
else:
net_pos = weights
ret_arr = net_pos * dx_return
if self.groups is not None:
ret_agg = pd.Series(ret_arr).groupby(self.groups).sum()
ret_agg.loc['total'] = ret_agg.sum()
else:
ret_agg = pd.Series(ret_arr.sum(), index=['total'])
ret_agg.index.name = 'industry'
ret_agg.name = 'er'
pos_table = pd.DataFrame(net_pos, columns=['weight'])
pos_table['ret'] = dx_return
if self.groups is not None:
ic_table = pos_table.groupby(self.groups).corr()['ret'].loc[(slice(None), 'weight')]
ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
else:
ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values},
index=ret_agg.index)
def factor_processing(self, pre_process, pos_process, do_neutralize) -> np.ndarray:
if self.risk_exp is None:
return factor_processing(self.raw_factors,
pre_process,
pos_process,
do_neutralize=do_neutralize)
else:
return factor_processing(self.raw_factors,
pre_process,
self.risk_exp,
pos_process,
do_neutralize)
def factor_analysis(factors: pd.DataFrame, def factor_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray, factor_weights: np.ndarray,
industry: np.ndarray, industry: np.ndarray,
......
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
from typing import Optional
import numpy as np
import pandas as pd
from alphamind.utilities import agg_mean
from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.analysis.utilities import FDataPack
def quantile_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray,
dx_return: np.ndarray,
n_bins: int=5,
benchmark: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
do_neutralize=True,
**kwargs):
if 'pre_process' in kwargs:
pre_process = kwargs['pre_process']
del kwargs['pre_process']
else:
pre_process = [winsorize_normal, standardize]
if 'post_process' in kwargs:
post_process = kwargs['post_process']
del kwargs['post_process']
else:
post_process = [standardize]
data_pack = FDataPack(raw_factors=factors.values,
risk_exp=risk_exp)
er = data_pack.factor_processing(pre_process, post_process, do_neutralize) @ factor_weights
return q_anl_impl(er, n_bins, dx_return)
def q_anl_impl(er: np.ndarray,
n_bins: int,
dx_return: np.ndarray) -> np.ndarray:
er = er.flatten()
q_groups = quantile(er, n_bins)
if dx_return.ndim < 2:
dx_return.shape = -1, 1
group_return = agg_mean(q_groups, dx_return).flatten()
return group_return
if __name__ == '__main__':
n = 5000
n_f = 5
n_bins = 5
x = np.random.randn(n, 5)
risk_exp = np.random.randn(n, 3)
x_w = np.random.randn(n_f)
r = np.random.randn(n)
f_df = pd.DataFrame(x)
calculated = quantile_analysis(f_df,
x_w,
r,
risk_exp=risk_exp,
n_bins=n_bins,
do_neutralize=True,
pre_process=[winsorize_normal, standardize],
post_process=[standardize])
er = x_w @ f_df.T
expected = q_anl_impl(er, 5, r)
print(calculated)
print(expected)
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
from typing import List
from typing import Optional
import numpy as np
import pandas as pd
from alphamind.data.neutralize import neutralize
def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None,
risk_factors: Optional[np.ndarray]=None,
post_process: Optional[List]=None,
do_neutralize: Optional[bool]=True) -> np.ndarray:
new_factors = raw_factors
if pre_process:
for p in pre_process:
new_factors = p(new_factors)
if risk_factors is not None and do_neutralize:
new_factors = neutralize(risk_factors, new_factors)
if post_process:
for p in pre_process:
new_factors = p(new_factors)
return new_factors
class FDataPack(object):
def __init__(self,
raw_factors: np.ndarray,
factor_names: List[str]=None,
codes: List=None,
groups: Optional[np.ndarray]=None,
benchmark: Optional[np.ndarray]=None,
constraints: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
risk_names: List[str]=None):
self.raw_factors = raw_factors
if factor_names:
self.factor_names = factor_names
else:
self.factor_names = ['factor' + str(i) for i in range(raw_factors.shape[1])]
self.codes = codes
if groups is not None:
self.groups = groups.flatten()
else:
self.groups = None
if benchmark is not None:
self.benchmark = benchmark.flatten()
else:
self.benchmark = None
self.risk_exp = risk_exp
self.constraints = constraints
self.risk_names = risk_names
def benchmark_constraints(self) -> np.ndarray:
return self.benchmark @ self.constraints
def settle(self, weights: np.ndarray, dx_return: np.ndarray) -> pd.DataFrame:
weights = weights.flatten()
dx_return = dx_return.flatten()
if self.benchmark is not None:
net_pos = weights - self.benchmark
else:
net_pos = weights
ret_arr = net_pos * dx_return
if self.groups is not None:
ret_agg = pd.Series(ret_arr).groupby(self.groups).sum()
ret_agg.loc['total'] = ret_agg.sum()
else:
ret_agg = pd.Series(ret_arr.sum(), index=['total'])
ret_agg.index.name = 'industry'
ret_agg.name = 'er'
pos_table = pd.DataFrame(net_pos, columns=['weight'])
pos_table['ret'] = dx_return
if self.groups is not None:
ic_table = pos_table.groupby(self.groups).corr()['ret'].loc[(slice(None), 'weight')]
ic_table.loc['total'] = pos_table.corr().iloc[0, 1]
else:
ic_table = pd.Series(pos_table.corr().iloc[0, 1], index=['total'])
return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values},
index=ret_agg.index)
def factor_processing(self, pre_process, pos_process, do_neutralize) -> np.ndarray:
if self.risk_exp is None:
return factor_processing(self.raw_factors,
pre_process,
pos_process,
do_neutralize=do_neutralize)
else:
return factor_processing(self.raw_factors,
pre_process,
self.risk_exp,
pos_process,
do_neutralize)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import numpy as np
def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
n = x.size
sorter = x.argsort()
inv = np.empty(n, dtype=int)
inv[sorter] = np.arange(n, dtype=int)
bin_size = float(n) / n_bins
pillars = [int(i * bin_size) for i in range(1, n_bins+1)]
q_groups = np.empty(n, dtype=int)
starter = 0
for i, r in enumerate(pillars):
q_groups[(inv >= starter) & (inv < r)] = i
starter = r
return q_groups
...@@ -12,7 +12,7 @@ from alphamind.data.winsorize import winsorize_normal ...@@ -12,7 +12,7 @@ from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints from alphamind.portfolio.constraints import Constraints
from alphamind.analysis.factoranalysis import factor_processing from alphamind.analysis.utilities import factor_processing
from alphamind.analysis.factoranalysis import factor_analysis from alphamind.analysis.factoranalysis import factor_analysis
......
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.analysis.quantileanalysis import q_anl_impl
from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.analysis.utilities import factor_processing
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.quantile import quantile
class TestQuantileAnalysis(unittest.TestCase):
def setUp(self):
n = 5000
n_f = 5
self.x = np.random.randn(n, 5)
self.x_w = np.random.randn(n_f)
self.r = np.random.randn(n)
self.risk_exp = np.random.randn(n, 3)
self.n_bins = 10
def test_q_anl_impl(self):
n_bins = 5
x = self.x[:, 0]
q_groups = quantile(x, n_bins)
s = pd.Series(self.r, index=q_groups)
expected_res = s.groupby(level=0).mean()
calculated_res = q_anl_impl(x, n_bins, self.r)
np.testing.assert_array_almost_equal(expected_res.values, calculated_res)
def test_quantile_analysis_simple(self):
f_df = pd.DataFrame(self.x)
calculated = quantile_analysis(f_df,
self.x_w,
self.r,
n_bins=self.n_bins,
do_neutralize=False,
pre_process=[],
post_process=[])
er = self.x_w @ self.x.T
expected = q_anl_impl(er, self.n_bins, self.r)
np.testing.assert_array_almost_equal(calculated, expected)
def test_quantile_analysis_with_factor_processing(self):
f_df = pd.DataFrame(self.x)
calculated = quantile_analysis(f_df,
self.x_w,
self.r,
n_bins=self.n_bins,
do_neutralize=True,
risk_exp=self.risk_exp,
pre_process=[winsorize_normal, standardize],
post_process=[standardize])
er = self.x_w @ factor_processing(self.x,
[winsorize_normal, standardize],
self.risk_exp,
[standardize],
True).T
expected = q_anl_impl(er, self.n_bins, self.r)
np.testing.assert_array_almost_equal(calculated, expected)
if __name__ == '__main__':
unittest.main()
# -*- coding: utf-8 -*-
"""
Created on 2017-8-16
@author: cheng.li
"""
import unittest
import numpy as np
from alphamind.data.quantile import quantile
class TestQuantile(unittest.TestCase):
def test_quantile(self):
n = 5000
bins = 10
s = np.random.randn(n)
calculated = quantile(s, bins)
rank = s.argsort().argsort()
bin_size = float(n) / bins
pillars = [int(i * bin_size) for i in range(1, bins + 1)]
starter = 0
for i, r in enumerate(pillars):
self.assertTrue(np.all(calculated[(rank >= starter) & (rank < r)] == i))
starter = r
if __name__ == "__main__":
unittest.main()
...@@ -14,6 +14,7 @@ from alphamind.utilities import alpha_logger ...@@ -14,6 +14,7 @@ from alphamind.utilities import alpha_logger
from alphamind.tests.data.test_neutralize import TestNeutralize from alphamind.tests.data.test_neutralize import TestNeutralize
from alphamind.tests.data.test_standardize import TestStandardize from alphamind.tests.data.test_standardize import TestStandardize
from alphamind.tests.data.test_winsorize import TestWinsorize from alphamind.tests.data.test_winsorize import TestWinsorize
from alphamind.tests.data.test_quantile import TestQuantile
from alphamind.tests.portfolio.test_constraints import TestConstraints from alphamind.tests.portfolio.test_constraints import TestConstraints
from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild from alphamind.tests.portfolio.test_longshortbuild import TestLongShortBuild
from alphamind.tests.portfolio.test_rankbuild import TestRankBuild from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
...@@ -24,12 +25,14 @@ from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle ...@@ -24,12 +25,14 @@ from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis from alphamind.tests.analysis.test_riskanalysis import TestRiskAnalysis
from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis from alphamind.tests.analysis.test_perfanalysis import TestPerformanceAnalysis
from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis from alphamind.tests.analysis.test_factoranalysis import TestFactorAnalysis
from alphamind.tests.analysis.test_quantilieanalysis import TestQuantileAnalysis
if __name__ == '__main__': if __name__ == '__main__':
runner = TestRunner([TestNeutralize, runner = TestRunner([TestNeutralize,
TestStandardize, TestStandardize,
TestWinsorize, TestWinsorize,
TestQuantile,
TestConstraints, TestConstraints,
TestLongShortBuild, TestLongShortBuild,
TestRankBuild, TestRankBuild,
...@@ -39,6 +42,7 @@ if __name__ == '__main__': ...@@ -39,6 +42,7 @@ if __name__ == '__main__':
TestSimpleSettle, TestSimpleSettle,
TestRiskAnalysis, TestRiskAnalysis,
TestPerformanceAnalysis, TestPerformanceAnalysis,
TestFactorAnalysis], TestFactorAnalysis,
TestQuantileAnalysis],
alpha_logger) alpha_logger)
runner.run() runner.run()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment