finally we remove the dependency on cython

8e2520bc · Dr.李 · 692bd963 · 8e2520bc · 8e2520bc · 8e2520bc
Commit 8e2520bc authored May 13, 2017 by Dr.李
11 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,6 @@ install:
  - conda install numba
  - conda install scipy
  - conda install pandas
-  - conda install cython
  - conda install scikit-learn
  - pip install cvxopt
  - pip install cvxpy
@@ -43,9 +42,9 @@ install:
  - pip install coveralls
 script:
  - export NUMBA_DISABLE_JIT=1
-  - python setup.py build_ext --line_trace --inplace
+  - python setup.py build_ext --inplace
-  - coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py
+  - coverage run alphamind/tests/test_suite.py
-  - coverage report --rcfile=./.coveragerc -i
+  - coverage report
-  - coverage html --rcfile=./.coveragerc -i
+  - coverage html
 after_success:
  - coveralls
--- a/alphamind/analysis/perfanalysis.py
+++ b/alphamind/analysis/perfanalysis.py
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-5-12
+@author: cheng.li
+"""
+import pandas as pd
+from alphamind.analysis.riskanalysis import risk_analysis
+def perf_attribution_by_pos(net_weight_series: pd.Series,
+                            next_bar_return_series: pd.Series,
+                            benchmark_table: pd.DataFrame) -> pd.DataFrame:
+    explained_table, _ = risk_analysis(net_weight_series,
+                                       next_bar_return_series,
+                                       benchmark_table)
+    return explained_table.groupby(level=0).sum()
--- a/alphamind/analysis/riskanalysis.py
+++ b/alphamind/analysis/riskanalysis.py
@@ -25,13 +25,13 @@ def risk_analysis(net_weight_series: pd.Series,
                                            output_exposure=True,
                                            output_explained=True)
-    systemetic = other_stats['explained']
+    systematic = other_stats['explained']
    exposure = other_stats['exposure']
-    explained_table = np.hstack((idiosyncratic, systemetic[:, :, 0]))
+    explained_table = np.hstack((idiosyncratic, systematic[:, :, 0]))
    cols = ['idiosyncratic']
    cols.extend(risk_factor_cols)
-    explained_table = pd.DataFrame(explained_table * net_pos , columns=cols, index=net_weight_series.index)
+    explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, index=net_weight_series.index)
    exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index)
    return explained_table, exposure_table.groupby(level=0).first()
--- a/alphamind/cyimpl.pyx
+++ b/alphamind/cyimpl.pyx
-# -*- coding: utf-8 -*-
-# distutils: language = c++
-"""
-Created on 2017-4-25
-@author: cheng.li
-"""
-import numpy as np
-cimport numpy as np
-cimport cython
-from libcpp.vector cimport vector as cpp_vector
-from libcpp.unordered_map cimport unordered_map as cpp_map
-from cython.operator cimport dereference as deref
-ctypedef long long int64_t
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef groupby(long[:] groups):
-    cdef long long length = groups.shape[0]
-    cdef cpp_map[long, cpp_vector[int64_t]] group_ids
-    cdef long long i
-    cdef long curr_tag
-    cdef cpp_map[long, cpp_vector[int64_t]].iterator it
-    cdef np.ndarray[long long, ndim=1] npy_array
-    for i in range(length):
-        curr_tag = groups[i]
-        it = group_ids.find(curr_tag)
-        if it == group_ids.end():
-            group_ids[curr_tag] = [i]
-        else:
-            deref(it).second.push_back(i)
-    return {k: np.array(v) for k, v in group_ids.items()}
\ No newline at end of file
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -12,7 +12,7 @@ from numpy.linalg import solve
 from typing import Tuple
 from typing import Union
 from typing import Dict
-from alphamind.cyimpl import groupby
+from alphamind.utilities import groupby
 def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
@@ -35,9 +35,11 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
            if output_exposure:
                exposure = zeros(x.shape + (1,))
-        groups_ids = groupby(groups)
+        index_diff, order = groupby(groups)
-        for curr_idx in groups_ids.values():
+        start = 0
+        for diff_loc in index_diff:
+            curr_idx = order[start:diff_loc + 1]
            curr_x, b = _sub_step(x, y, curr_idx, res)
            if output_exposure:
                for i in range(exposure.shape[2]):
@@ -45,6 +47,16 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
            if output_explained:
                for i in range(explained.shape[2]):
                    explained[curr_idx] = ls_explain(curr_x, b)
+            start = diff_loc + 1
+        curr_idx = order[start:]
+        curr_x, b = _sub_step(x, y, curr_idx, res)
+        if output_exposure:
+            for i in range(exposure.shape[2]):
+                exposure[curr_idx, :, i] = b[:, i]
+        if output_explained:
+            for i in range(explained.shape[2]):
+                explained[curr_idx] = ls_explain(curr_x, b)
    else:
        b = ls_fit(x, y)
        res = ls_res(x, y, b)

--- a/alphamind/model/linearmodel.py
+++ b/alphamind/model/linearmodel.py
@@ -5,10 +5,11 @@ Created on 2017-5-10
 @author: cheng.li
 """
+from typing import Tuple
 from typing import Union
 import numpy as np
 import numba as nb
-from alphamind.cyimpl import groupby
+from alphamind.utilities import groupby
 from alphamind.data.neutralize import ls_fit
@@ -21,13 +22,9 @@ class LinearModel(object):
        self.model_parameter = _train(x, y, groups)
    def predict(self, x, groups=None):
-        if groups is not None and isinstance(self.model_parameter, dict):
+        if groups is not None and isinstance(self.model_parameter, tuple):
            names = np.unique(groups)
-            pred_v = np.zeros(x.shape[0])
+            return _prediction_impl(self.model_parameter[0], self.model_parameter[1], groups, names, x)
-            for name in names:
-                this_param = self.model_parameter[name]
-                _prediction_group(name, groups, this_param, x, pred_v)
-            return pred_v
        elif self.model_parameter is None:
            raise ValueError("linear model is not calibrated yet")
        elif groups is None:
@@ -37,22 +34,35 @@ class LinearModel(object):
 @nb.njit(nogil=True, cache=True)
-def _prediction_group(name, groups, this_param, x, pred_v):
+def _prediction_impl(calibrated_names, model_parameter, groups, names, x):
-    idx = groups == name
+    places = np.searchsorted(calibrated_names, names)
-    pred_v[idx] = x[idx] @ this_param
+    pred_v = np.zeros(x.shape[0])
+    for k, name in zip(places, names):
+        this_param = model_parameter[k]
+        idx = groups == name
+        pred_v[idx] = x[idx] @ this_param
+    return pred_v
-def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    if groups is None:
        return ls_fit(x, y)
    else:
-        groups_ids = groupby(groups)
+        index_diff, order = groupby(groups)
-        res_beta = {}
+        res_beta = _train_loop(index_diff, order, x, y)
+        return np.unique(groups), res_beta
-        for k, curr_idx in groups_ids.items():
-            res_beta[k] = _train_sub_group(x, y, curr_idx)
-        return res_beta
+@nb.njit(nogil=True, cache=True)
+def _train_loop(index_diff, order, x, y):
+    res_beta = np.zeros((len(index_diff)+1, x.shape[1]))
+    start = 0
+    for k, diff_loc in enumerate(index_diff):
+        res_beta[k] = _train_sub_group(x, y, order[start:diff_loc + 1])
+        start = diff_loc + 1
+    res_beta[k + 1] = _train_sub_group(x, y, order[start:])
+    return res_beta
 @nb.njit(nogil=True, cache=True)
@@ -60,25 +70,3 @@ def _train_sub_group(x, y, curr_idx):
    curr_x = x[curr_idx]
    curr_y = y[curr_idx]
    return ls_fit(curr_x, curr_y)
-if __name__ == '__main__':
-    import datetime as dt
-    x = np.random.randn(3000, 10)
-    y = np.random.randn(3000)
-    groups = np.random.randint(30, size=3000)
-    to_x = np.random.randn(100, 10)
-    to_groups = np.random.randint(30, size=100)
-    model = LinearModel()
-    start = dt.datetime.now()
-    for i in range(5000):
-        model.calibrate(x, y, groups)
-    print(dt.datetime.now() - start)
-    start = dt.datetime.now()
-    for i in range(50000):
-        model.predict(to_x, to_groups)
-    print(dt.datetime.now() - start)
\ No newline at end of file
--- a/alphamind/portfolio/percentbuilder.py
+++ b/alphamind/portfolio/percentbuilder.py
@@ -8,7 +8,7 @@ Created on 2017-5-4
 import numpy as np
 from numpy import zeros
 from numpy import zeros_like
-from alphamind.cyimpl import groupby
+from alphamind.utilities import groupby
 from alphamind.utilities import set_value
@@ -20,12 +20,21 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
        length = len(neg_er)
        weights = zeros((length, 1))
        if groups is not None:
-            group_ids = groupby(groups)
+            index_diff, order = groupby(groups)
-            for current_index in group_ids.values():
+            start = 0
+            for diff_loc in index_diff:
+                current_index = order[start:diff_loc+1]
                current_ordering = neg_er[current_index].argsort()
                current_ordering.shape = -1, 1
                use_rank = int(percent * len(current_index))
                set_value(weights, current_index[current_ordering[:use_rank]], 1.)
+                start = diff_loc + 1
+            current_index = order[start:]
+            current_ordering = neg_er[current_index].argsort()
+            current_ordering.shape = -1, 1
+            use_rank = int(percent * len(current_index))
+            set_value(weights, current_index[current_ordering[:use_rank]], 1.)
        else:
            ordering = neg_er.argsort()
            use_rank = int(percent * len(neg_er))
@@ -36,11 +45,18 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
        weights = zeros_like(er)
        if groups is not None:
-            group_ids = groupby(groups)
+            index_diff, order = groupby(groups)
-            for current_index in group_ids.values():
+            start = 0
+            for diff_loc in index_diff:
+                current_index = order[start:diff_loc + 1]
                current_ordering = neg_er[current_index].argsort(axis=0)
                use_rank = int(percent * len(current_index))
                set_value(weights, current_index[current_ordering[:use_rank]], 1)
+                start = diff_loc + 1
+            current_index = order[start:]
+            current_ordering = neg_er[current_index].argsort(axis=0)
+            use_rank = int(percent * len(current_index))
+            set_value(weights, current_index[current_ordering[:use_rank]], 1)
        else:
            ordering = neg_er.argsort(axis=0)
            use_rank = int(percent * len(neg_er))

--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
@@ -8,7 +8,7 @@ Created on 2017-4-26
 import numpy as np
 from numpy import zeros
 from numpy import zeros_like
-from alphamind.cyimpl import groupby
+from alphamind.utilities import groupby
 from alphamind.utilities import set_value
@@ -20,11 +20,19 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
        length = len(neg_er)
        weights = zeros((length, 1))
        if groups is not None:
-            group_ids = groupby(groups)
+            index_diff, order = groupby(groups)
-            for current_index in group_ids.values():
+            start = 0
+            for diff_loc in index_diff:
+                current_index = order[start:diff_loc+1]
                current_ordering = neg_er[current_index].argsort()
                current_ordering.shape = -1, 1
                set_value(weights, current_index[current_ordering[:use_rank]], 1.)
+                start = diff_loc + 1
+            current_index = order[start:]
+            current_ordering = neg_er[current_index].argsort()
+            current_ordering.shape = -1, 1
+            set_value(weights, current_index[current_ordering[:use_rank]], 1.)
        else:
            ordering = neg_er.argsort()
            weights[ordering[:use_rank]] = 1.
@@ -34,10 +42,17 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
        weights = zeros_like(er)
        if groups is not None:
-            group_ids = groupby(groups)
+            index_diff, order = groupby(groups)
-            for current_index in group_ids.values():
+            start = 0
+            for diff_loc in index_diff:
+                current_index = order[start:diff_loc + 1]
                current_ordering = neg_er[current_index].argsort(axis=0)
                set_value(weights, current_index[current_ordering[:use_rank]], 1)
+                start = diff_loc + 1
+            current_index = order[start:]
+            current_ordering = neg_er[current_index].argsort(axis=0)
+            set_value(weights, current_index[current_ordering[:use_rank]], 1)
        else:
            ordering = neg_er.argsort(axis=0)
            set_value(weights, ordering[:use_rank], 1.)

--- a/alphamind/utilities.py
+++ b/alphamind/utilities.py
@@ -14,6 +14,13 @@ import numba as nb
 alpha_logger = CustomLogger('ALPHA_MIND', 'info')
+def groupby(groups):
+    order = groups.argsort()
+    t = groups[order]
+    index_diff = np.where(np.diff(t))[0]
+    return index_diff, order
 @nb.njit(nogil=True, cache=True)
 def set_value(mat, used_level, to_fill):
    length, width = used_level.shape

--- a/requirements.txt
+++ b/requirements.txt
 cvxopt >= 1.1.9
 cvxpy >= 0.4.9
-cython >= 0.25.2
 numpy >= 1.12.1
 numba >= 0.30.0
 scikit-learn >= 0.18.1

--- a/setup.py
+++ b/setup.py
@@ -6,56 +6,19 @@ Created on 2017-4-25
 """
 import platform
-import sys
 import io
 from setuptools import setup
 from setuptools import find_packages
-from distutils.extension import Extension
 import numpy as np
-import Cython
-from Cython.Build import cythonize
-Cython.Compiler.Options.annotate = True
 VERSION = "0.1.0"
-if "--line_trace" in sys.argv:
-    line_trace = True
-    print("Build with line trace enabled ...")
-    sys.argv.remove("--line_trace")
-else:
-    line_trace = False
-ext_modules = ['alphamind/cyimpl.pyx']
-def generate_extensions(ext_modules, line_trace=False):
-    extensions = []
-    if line_trace:
-        print("define cython trace to True ...")
-        define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
-    else:
-        define_macros = []
-    if platform.system() != "Windows":
-        extra_compile_args = ['-O3', '-std=c++11']
-    else:
-        extra_compile_args = ['/Ox']
-    for pyxfile in ext_modules:
-        ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
-                        sources=[pyxfile],
-                        define_macros=define_macros,
-                        extra_compile_args=extra_compile_args)
-        extensions.append(ext)
-    return extensions
 if platform.system() != "Windows":
    import multiprocessing
    n_cpu = multiprocessing.cpu_count()
 else:
    n_cpu = 0
-ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
-                                 compiler_directives={'embedsignature': True, 'linetrace': line_trace},
-                                 nthreads=n_cpu)
 setup(
    name='Alpha-Mind',
    version=VERSION,
@@ -65,7 +28,6 @@ setup(
    author='wegamekinglc',
    author_email='',
    install_requires=io.open('requirements.txt', encoding='utf8').read(),
-    ext_modules=ext_modules_settings,
    include_dirs=[np.get_include()],
    description=''
 )
\ No newline at end of file