greatly enhance the optimizer performance by using factor model

8541e9e2 · Dr.李 · 949d03a9 · 8541e9e2 · 8541e9e2 · 40976afd
Commit 8541e9e2 authored May 27, 2018 by Dr.李
6 changed files
--- a/alphamind/cython/optimizers.pyx
+++ b/alphamind/cython/optimizers.pyx
@@ -10,6 +10,7 @@ cimport numpy as cnp
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 import numpy as np
+from PyFin.api import pyFinAssert
 cdef extern from "lpoptimizer.hpp" namespace "pfopt":
@@ -71,7 +72,11 @@ cdef extern from "tvoptimizer.hpp" namespace "pfopt":
                    double*,
                    double*,
                    double,
-                    double) except +
+                    double,
+                    int,
+                    double*,
+                    double*,
+                    double*) except +
        vector[double] xValue()
        double feval()
        int status()
@@ -81,6 +86,7 @@ cdef class CVOptimizer:
    cdef TVOptimizer* cobj
    cdef int n
    cdef int m
+    cdef int f
    def __cinit__(self,
                  double[:] expected_return,
@@ -91,20 +97,26 @@ cdef class CVOptimizer:
                  double[:] clbound=None,
                  double[:] cubound=None,
                  double target_low=0.0,
-                  double target_high=1.0):
+                  double target_high=1.0,
+                  cnp.ndarray[double, ndim=2] factor_cov_matrix=None,
+                  cnp.ndarray[double, ndim=2] factor_loading_matrix=None,
+                  double[:] idsync_risk=None):
        self.n = lbound.shape[0]
        self.m = 0
-        cdef double[:] cov = cov_matrix.flatten(order='C')
+        self.f = factor_cov_matrix.shape[0] if factor_cov_matrix is not None else 0
+        cdef double[:] cov = cov_matrix.flatten(order='C') if cov_matrix is not None else None
        cdef double[:] cons
+        cdef double[:] factor_cov = factor_cov_matrix.flatten(order='C') if factor_cov_matrix is not None else None
+        cdef double[:] factor_loading = factor_loading_matrix.flatten(order='C') if factor_loading_matrix is not None else None
        if cons_matrix is not None:
            self.m = cons_matrix.shape[0]
-            cons = cons_matrix.flatten(order='C');
+            cons = cons_matrix.flatten(order='C')
            self.cobj = new TVOptimizer(self.n,
                                        &expected_return[0],
-                                        &cov[0],
+                                        &cov[0] if cov is not None else NULL,
                                        &lbound[0],
                                        &ubound[0],
                                        self.m,
@@ -112,11 +124,15 @@ cdef class CVOptimizer:
                                        &clbound[0],
                                        &cubound[0],
                                        target_low,
-                                        target_high)
+                                        target_high,
+                                        self.f,
+                                        &factor_cov[0] if factor_cov is not None else NULL,
+                                        &factor_loading[0] if factor_loading is not None else NULL,
+                                        &idsync_risk[0] if idsync_risk is not None else NULL)
        else:
            self.cobj = new TVOptimizer(self.n,
                                        &expected_return[0],
-                                        &cov[0],
+                                        &cov[0] if cov is not None else NULL,
                                        &lbound[0],
                                        &ubound[0],
                                        0,
@@ -124,7 +140,11 @@ cdef class CVOptimizer:
                                        NULL,
                                        NULL,
                                        target_low,
-                                        target_high)
+                                        target_high,
+                                        self.f,
+                                        &factor_cov[0] if factor_cov is not None else NULL,
+                                        &factor_loading[0] if factor_loading is not None else NULL,
+                                        &idsync_risk[0] if idsync_risk is not None else NULL)
    def __dealloc__(self):
        del self.cobj
@@ -150,7 +170,11 @@ cdef extern from "mvoptimizer.hpp" namespace "pfopt":
                    double*,
                    double*,
                    double*,
-                    double) except +
+                    double,
+                    int,
+                    double*,
+                    double*,
+                    double*) except +
        vector[double] xValue()
        double feval()
        int status()
@@ -171,12 +195,12 @@ cdef extern from "qpalglib.hpp" namespace "pfopt":
 cdef class QPOptimizer:
    cdef MVOptimizer* cobj
-    cdef QPAlglib* cobj2
    cdef cnp.ndarray er
    cdef cnp.ndarray cov
    cdef double risk_aversion
    cdef int n
    cdef int m
+    cdef int f
    def __cinit__(self,
                  double[:] expected_return,
@@ -186,15 +210,21 @@ cdef class QPOptimizer:
                  cnp.ndarray[double, ndim=2] cons_matrix=None,
                  double[:] clbound=None,
                  double[:] cubound=None,
-                 double risk_aversion=1.0):
+                  double risk_aversion=1.0,
+                  cnp.ndarray[double, ndim=2] factor_cov_matrix=None,
+                  cnp.ndarray[double, ndim=2] factor_loading_matrix=None,
+                  double[:] idsync_risk=None):
        self.n = lbound.shape[0]
        self.m = 0
+        self.f = factor_cov_matrix.shape[0] if factor_cov_matrix is not None else 0
        self.er = np.array(expected_return)
        self.cov = np.array(cov_matrix)
        self.risk_aversion = risk_aversion
-        cdef double[:] cov = cov_matrix.flatten(order='C')
+        cdef double[:] cov = cov_matrix.flatten(order='C') if cov_matrix is not None else None
        cdef double[:] cons
+        cdef double[:] factor_cov = factor_cov_matrix.flatten(order='C') if factor_cov_matrix is not None else None
+        cdef double[:] factor_loading = factor_loading_matrix.flatten(order='C') if factor_loading_matrix is not None else None
        if cons_matrix is not None:
            self.m = cons_matrix.shape[0]
@@ -202,48 +232,49 @@ cdef class QPOptimizer:
            self.cobj = new MVOptimizer(self.n,
                                        &expected_return[0],
-                                        &cov[0],
+                                        &cov[0] if cov is not None else NULL,
                                        &lbound[0],
                                        &ubound[0],
                                        self.m,
                                        &cons[0],
                                        &clbound[0],
                                        &cubound[0],
-                                        risk_aversion)
+                                        risk_aversion,
+                                        self.f,
+                                        &factor_cov[0] if factor_cov is not None else NULL,
+                                        &factor_loading[0] if factor_loading is not None else NULL,
+                                        &idsync_risk[0] if idsync_risk is not None else NULL)
        else:
-            self.cobj2 = new QPAlglib(self.n,
+            # self.cobj2 = new QPAlglib(self.n,
+            #                           &expected_return[0],
+            #                           &cov[0] if cov is not None else NULL,
+            #                           &lbound[0],
+            #                           &ubound[0],
+            #                           risk_aversion)
+            self.cobj = new MVOptimizer(self.n,
                                        &expected_return[0],
-                                      &cov[0],
+                                        &cov[0] if cov is not None else NULL,
                                        &lbound[0],
                                        &ubound[0],
-                                      risk_aversion)
+                                        self.m,
+                                        NULL,
+                                        NULL,
+                                        NULL,
+                                        risk_aversion,
+                                        self.f,
+                                        &factor_cov[0] if factor_cov is not None else NULL,
+                                        &factor_loading[0] if factor_loading is not None else NULL,
+                                        &idsync_risk[0] if idsync_risk is not None else NULL)
    def __dealloc__(self):
-        if self.cobj:
        del self.cobj
-        else:
-            del self.cobj2
    def feval(self):
-        if self.cobj:
        return self.cobj.feval()
-        else:
-            x = np.array(self.cobj2.xValue())
-            return 0.5 * self.risk_aversion * x @ self.cov @ x - self.er @ x
    def x_value(self):
-        if self.cobj:
        return np.array(self.cobj.xValue())
-        else:
-            return np.array(self.cobj2.xValue())
    def status(self):
-        if self.cobj:
        return self.cobj.status()
-        else:
-            status = self.cobj2.status()
-            if 1 <= status <= 4:
-                return 0
-            else:
-                return status
--- a/alphamind/data/engines/sqlengine.py
+++ b/alphamind/data/engines/sqlengine.py
@@ -371,7 +371,7 @@ class SqlEngine(object):
        res['chgPct'] = df.chgPct
        res = res.loc[ref_date]
        res.index = list(range(len(res)))
-        return res.drop_duplicates(['trade_date', 'code'])
+        return res
    def fetch_factor_range(self,
                           universe: Universe,

--- a/pfopt @ 40976afd
+++ b/pfopt @ 40976afd
-Subproject commit 1dcecd88728512ff50730f418d9d58195bf33851
+Subproject commit 40976afd3ea03b921177cef364fa8a6b37bf4dda
--- a/alphamind/portfolio/meanvariancebuilder.py
+++ b/alphamind/portfolio/meanvariancebuilder.py
@@ -51,7 +51,10 @@ def mean_variance_builder(er: np.ndarray,
                          ubound: Union[np.ndarray, float],
                          risk_exposure: Optional[np.ndarray],
                          risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
-                          lam: float=1.) -> Tuple[str, float, np.ndarray]:
+                          lam: float=1.,
+                          factor_cov: np.ndarray=None,
+                          factor_loading: np.ndarray=None,
+                          idsync: np.ndarray=None) -> Tuple[str, float, np.ndarray]:
    lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target)
    optimizer = QPOptimizer(er,
@@ -61,7 +64,10 @@ def mean_variance_builder(er: np.ndarray,
                            cons_mat,
                            clbound,
                            cubound,
-                            lam)
+                            lam,
+                            factor_cov,
+                            factor_loading,
+                            idsync)
    return _create_result(optimizer, bm)
@@ -74,7 +80,10 @@ def target_vol_builder(er: np.ndarray,
                       risk_exposure: Optional[np.ndarray],
                       risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
                       vol_low: float = 0.,
-                       vol_high: float = 1.)-> Tuple[str, float, np.ndarray]:
+                       vol_high: float = 1.,
+                       factor_cov: np.ndarray = None,
+                       factor_loading: np.ndarray = None,
+                       idsync: np.ndarray = None)-> Tuple[str, float, np.ndarray]:
    lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target)
    optimizer = CVOptimizer(er,
@@ -85,7 +94,10 @@ def target_vol_builder(er: np.ndarray,
                            clbound,
                            cubound,
                            vol_low,
-                            vol_high)
+                            vol_high,
+                            factor_cov,
+                            factor_loading,
+                            idsync)
    return _create_result(optimizer, bm)

--- a/alphamind/tests/cython/test_optimizers.py
+++ b/alphamind/tests/cython/test_optimizers.py
@@ -54,6 +54,36 @@ class TestOptimizers(unittest.TestCase):
                                             [0.1996, 0.3004, 0.5000],
                                             4)
+    def test_qpoptimizer_with_factor_model(self):
+        objective = np.array([0.1, 0.2, 0.3])
+        lbound = np.array([0.0, 0.0, 0.0])
+        ubound = np.array([1.0, 1.0, 1.0])
+        factor_var = np.array([[0.5, -0.3], [-0.3, 0.7]])
+        factor_load = np.array([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]])
+        idsync = np.array([0.1, 0.3, 0.2])
+        cons = np.array([[1., 1., 1.]])
+        clbound = np.array([1.])
+        cubound = np.array([1.])
+        optimizer = QPOptimizer(objective,
+                                None,
+                                lbound,
+                                ubound,
+                                cons,
+                                clbound,
+                                cubound,
+                                1.,
+                                factor_var,
+                                factor_load,
+                                idsync)
+        # check against cvxpy result
+        np.testing.assert_array_almost_equal(optimizer.x_value(),
+                                             [0.2866857, 0.21416417, 0.49915014],
+                                             4)
    def test_qpoptimizer_with_identity_matrix(self):
        objective = np.array([-0.02, 0.01, 0.03])
        cov = np.diag([1., 1., 1.])
@@ -122,6 +152,38 @@ class TestOptimizers(unittest.TestCase):
                                             [-0.3, -0.10919033, 0.40919033],
                                             4)
+    def test_cvoptimizer_with_factor_model(self):
+        objective = np.array([0.1, 0.2, 0.3])
+        lbound = np.array([0.0, 0.0, 0.0])
+        ubound = np.array([1.0, 1.0, 1.0])
+        factor_var = np.array([[0.5, -0.3], [-0.3, 0.7]])
+        factor_load = np.array([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]])
+        idsync = np.array([0.1, 0.3, 0.2])
+        cons = np.array([[1., 1., 1.]])
+        clbound = np.array([1.])
+        cubound = np.array([1.])
+        target_vol = 0.5
+        optimizer = CVOptimizer(objective,
+                                None,
+                                lbound,
+                                ubound,
+                                cons,
+                                clbound,
+                                cubound,
+                                0.,
+                                target_vol,
+                                factor_var,
+                                factor_load,
+                                idsync)
+        # check against cvxpy result
+        np.testing.assert_array_almost_equal(optimizer.x_value(),
+                                             [0.26595552, 0.21675092, 0.51729356],
+                                             4)
    def test_cvoptimizer_with_cons_and_ieq(self):
        objective = np.array([0.1, 0.2, 0.3])
        cov = np.array([[0.05, 0.01, 0.02],

--- a/notebooks/Example 7 - Portfolio Optimizer Performance.ipynb
+++ b/notebooks/Example 7 - Portfolio Optimizer Performance.ipynb