Commit 8e2520bc authored by Dr.李's avatar Dr.李

finally we remove the dependency on cython

parent 692bd963
...@@ -35,7 +35,6 @@ install: ...@@ -35,7 +35,6 @@ install:
- conda install numba - conda install numba
- conda install scipy - conda install scipy
- conda install pandas - conda install pandas
- conda install cython
- conda install scikit-learn - conda install scikit-learn
- pip install cvxopt - pip install cvxopt
- pip install cvxpy - pip install cvxpy
...@@ -43,9 +42,9 @@ install: ...@@ -43,9 +42,9 @@ install:
- pip install coveralls - pip install coveralls
script: script:
- export NUMBA_DISABLE_JIT=1 - export NUMBA_DISABLE_JIT=1
- python setup.py build_ext --line_trace --inplace - python setup.py build_ext --inplace
- coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py - coverage run alphamind/tests/test_suite.py
- coverage report --rcfile=./.coveragerc -i - coverage report
- coverage html --rcfile=./.coveragerc -i - coverage html
after_success: after_success:
- coveralls - coveralls
# -*- coding: utf-8 -*-
"""
Created on 2017-5-12
@author: cheng.li
"""
import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis
def perf_attribution_by_pos(net_weight_series: pd.Series,
next_bar_return_series: pd.Series,
benchmark_table: pd.DataFrame) -> pd.DataFrame:
explained_table, _ = risk_analysis(net_weight_series,
next_bar_return_series,
benchmark_table)
return explained_table.groupby(level=0).sum()
...@@ -25,13 +25,13 @@ def risk_analysis(net_weight_series: pd.Series, ...@@ -25,13 +25,13 @@ def risk_analysis(net_weight_series: pd.Series,
output_exposure=True, output_exposure=True,
output_explained=True) output_explained=True)
systemetic = other_stats['explained'] systematic = other_stats['explained']
exposure = other_stats['exposure'] exposure = other_stats['exposure']
explained_table = np.hstack((idiosyncratic, systemetic[:, :, 0])) explained_table = np.hstack((idiosyncratic, systematic[:, :, 0]))
cols = ['idiosyncratic'] cols = ['idiosyncratic']
cols.extend(risk_factor_cols) cols.extend(risk_factor_cols)
explained_table = pd.DataFrame(explained_table * net_pos , columns=cols, index=net_weight_series.index) explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, index=net_weight_series.index)
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index) exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index)
return explained_table, exposure_table.groupby(level=0).first() return explained_table, exposure_table.groupby(level=0).first()
# -*- coding: utf-8 -*-
# distutils: language = c++
"""
Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
cimport numpy as np
cimport cython
from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref
ctypedef long long int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i
cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef np.ndarray[long long, ndim=1] npy_array
for i in range(length):
curr_tag = groups[i]
it = group_ids.find(curr_tag)
if it == group_ids.end():
group_ids[curr_tag] = [i]
else:
deref(it).second.push_back(i)
return {k: np.array(v) for k, v in group_ids.items()}
\ No newline at end of file
...@@ -12,7 +12,7 @@ from numpy.linalg import solve ...@@ -12,7 +12,7 @@ from numpy.linalg import solve
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from typing import Dict from typing import Dict
from alphamind.cyimpl import groupby from alphamind.utilities import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
...@@ -35,9 +35,11 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -35,9 +35,11 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
if output_exposure: if output_exposure:
exposure = zeros(x.shape + (1,)) exposure = zeros(x.shape + (1,))
groups_ids = groupby(groups) index_diff, order = groupby(groups)
for curr_idx in groups_ids.values(): start = 0
for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1]
curr_x, b = _sub_step(x, y, curr_idx, res) curr_x, b = _sub_step(x, y, curr_idx, res)
if output_exposure: if output_exposure:
for i in range(exposure.shape[2]): for i in range(exposure.shape[2]):
...@@ -45,6 +47,16 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp ...@@ -45,6 +47,16 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
if output_explained: if output_explained:
for i in range(explained.shape[2]): for i in range(explained.shape[2]):
explained[curr_idx] = ls_explain(curr_x, b) explained[curr_idx] = ls_explain(curr_x, b)
start = diff_loc + 1
curr_idx = order[start:]
curr_x, b = _sub_step(x, y, curr_idx, res)
if output_exposure:
for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i]
if output_explained:
for i in range(explained.shape[2]):
explained[curr_idx] = ls_explain(curr_x, b)
else: else:
b = ls_fit(x, y) b = ls_fit(x, y)
res = ls_res(x, y, b) res = ls_res(x, y, b)
......
...@@ -5,10 +5,11 @@ Created on 2017-5-10 ...@@ -5,10 +5,11 @@ Created on 2017-5-10
@author: cheng.li @author: cheng.li
""" """
from typing import Tuple
from typing import Union from typing import Union
import numpy as np import numpy as np
import numba as nb import numba as nb
from alphamind.cyimpl import groupby from alphamind.utilities import groupby
from alphamind.data.neutralize import ls_fit from alphamind.data.neutralize import ls_fit
...@@ -21,13 +22,9 @@ class LinearModel(object): ...@@ -21,13 +22,9 @@ class LinearModel(object):
self.model_parameter = _train(x, y, groups) self.model_parameter = _train(x, y, groups)
def predict(self, x, groups=None): def predict(self, x, groups=None):
if groups is not None and isinstance(self.model_parameter, dict): if groups is not None and isinstance(self.model_parameter, tuple):
names = np.unique(groups) names = np.unique(groups)
pred_v = np.zeros(x.shape[0]) return _prediction_impl(self.model_parameter[0], self.model_parameter[1], groups, names, x)
for name in names:
this_param = self.model_parameter[name]
_prediction_group(name, groups, this_param, x, pred_v)
return pred_v
elif self.model_parameter is None: elif self.model_parameter is None:
raise ValueError("linear model is not calibrated yet") raise ValueError("linear model is not calibrated yet")
elif groups is None: elif groups is None:
...@@ -37,22 +34,35 @@ class LinearModel(object): ...@@ -37,22 +34,35 @@ class LinearModel(object):
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def _prediction_group(name, groups, this_param, x, pred_v): def _prediction_impl(calibrated_names, model_parameter, groups, names, x):
idx = groups == name places = np.searchsorted(calibrated_names, names)
pred_v[idx] = x[idx] @ this_param pred_v = np.zeros(x.shape[0])
for k, name in zip(places, names):
this_param = model_parameter[k]
idx = groups == name
pred_v[idx] = x[idx] @ this_param
return pred_v
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
if groups is None: if groups is None:
return ls_fit(x, y) return ls_fit(x, y)
else: else:
groups_ids = groupby(groups) index_diff, order = groupby(groups)
res_beta = {} res_beta = _train_loop(index_diff, order, x, y)
return np.unique(groups), res_beta
for k, curr_idx in groups_ids.items():
res_beta[k] = _train_sub_group(x, y, curr_idx)
return res_beta @nb.njit(nogil=True, cache=True)
def _train_loop(index_diff, order, x, y):
res_beta = np.zeros((len(index_diff)+1, x.shape[1]))
start = 0
for k, diff_loc in enumerate(index_diff):
res_beta[k] = _train_sub_group(x, y, order[start:diff_loc + 1])
start = diff_loc + 1
res_beta[k + 1] = _train_sub_group(x, y, order[start:])
return res_beta
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
...@@ -60,25 +70,3 @@ def _train_sub_group(x, y, curr_idx): ...@@ -60,25 +70,3 @@ def _train_sub_group(x, y, curr_idx):
curr_x = x[curr_idx] curr_x = x[curr_idx]
curr_y = y[curr_idx] curr_y = y[curr_idx]
return ls_fit(curr_x, curr_y) return ls_fit(curr_x, curr_y)
if __name__ == '__main__':
import datetime as dt
x = np.random.randn(3000, 10)
y = np.random.randn(3000)
groups = np.random.randint(30, size=3000)
to_x = np.random.randn(100, 10)
to_groups = np.random.randint(30, size=100)
model = LinearModel()
start = dt.datetime.now()
for i in range(5000):
model.calibrate(x, y, groups)
print(dt.datetime.now() - start)
start = dt.datetime.now()
for i in range(50000):
model.predict(to_x, to_groups)
print(dt.datetime.now() - start)
\ No newline at end of file
...@@ -8,7 +8,7 @@ Created on 2017-5-4 ...@@ -8,7 +8,7 @@ Created on 2017-5-4
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import zeros_like from numpy import zeros_like
from alphamind.cyimpl import groupby from alphamind.utilities import groupby
from alphamind.utilities import set_value from alphamind.utilities import set_value
...@@ -20,12 +20,21 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np ...@@ -20,12 +20,21 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
length = len(neg_er) length = len(neg_er)
weights = zeros((length, 1)) weights = zeros((length, 1))
if groups is not None: if groups is not None:
group_ids = groupby(groups) index_diff, order = groupby(groups)
for current_index in group_ids.values(): start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_ordering = neg_er[current_index].argsort() current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1 current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index)) use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1.) set_value(weights, current_index[current_ordering[:use_rank]], 1.)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
else: else:
ordering = neg_er.argsort() ordering = neg_er.argsort()
use_rank = int(percent * len(neg_er)) use_rank = int(percent * len(neg_er))
...@@ -36,11 +45,18 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np ...@@ -36,11 +45,18 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
weights = zeros_like(er) weights = zeros_like(er)
if groups is not None: if groups is not None:
group_ids = groupby(groups) index_diff, order = groupby(groups)
for current_index in group_ids.values(): start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort(axis=0) current_ordering = neg_er[current_index].argsort(axis=0)
use_rank = int(percent * len(current_index)) use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1) set_value(weights, current_index[current_ordering[:use_rank]], 1)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort(axis=0)
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1)
else: else:
ordering = neg_er.argsort(axis=0) ordering = neg_er.argsort(axis=0)
use_rank = int(percent * len(neg_er)) use_rank = int(percent * len(neg_er))
......
...@@ -8,7 +8,7 @@ Created on 2017-4-26 ...@@ -8,7 +8,7 @@ Created on 2017-4-26
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import zeros_like from numpy import zeros_like
from alphamind.cyimpl import groupby from alphamind.utilities import groupby
from alphamind.utilities import set_value from alphamind.utilities import set_value
...@@ -20,11 +20,19 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -20,11 +20,19 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
length = len(neg_er) length = len(neg_er)
weights = zeros((length, 1)) weights = zeros((length, 1))
if groups is not None: if groups is not None:
group_ids = groupby(groups) index_diff, order = groupby(groups)
for current_index in group_ids.values(): start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_ordering = neg_er[current_index].argsort() current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1 current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.) set_value(weights, current_index[current_ordering[:use_rank]], 1.)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
else: else:
ordering = neg_er.argsort() ordering = neg_er.argsort()
weights[ordering[:use_rank]] = 1. weights[ordering[:use_rank]] = 1.
...@@ -34,10 +42,17 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda ...@@ -34,10 +42,17 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
weights = zeros_like(er) weights = zeros_like(er)
if groups is not None: if groups is not None:
group_ids = groupby(groups) index_diff, order = groupby(groups)
for current_index in group_ids.values(): start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort(axis=0) current_ordering = neg_er[current_index].argsort(axis=0)
set_value(weights, current_index[current_ordering[:use_rank]], 1) set_value(weights, current_index[current_ordering[:use_rank]], 1)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort(axis=0)
set_value(weights, current_index[current_ordering[:use_rank]], 1)
else: else:
ordering = neg_er.argsort(axis=0) ordering = neg_er.argsort(axis=0)
set_value(weights, ordering[:use_rank], 1.) set_value(weights, ordering[:use_rank], 1.)
......
...@@ -14,6 +14,13 @@ import numba as nb ...@@ -14,6 +14,13 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info') alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def groupby(groups):
order = groups.argsort()
t = groups[order]
index_diff = np.where(np.diff(t))[0]
return index_diff, order
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def set_value(mat, used_level, to_fill): def set_value(mat, used_level, to_fill):
length, width = used_level.shape length, width = used_level.shape
......
cvxopt >= 1.1.9 cvxopt >= 1.1.9
cvxpy >= 0.4.9 cvxpy >= 0.4.9
cython >= 0.25.2
numpy >= 1.12.1 numpy >= 1.12.1
numba >= 0.30.0 numba >= 0.30.0
scikit-learn >= 0.18.1 scikit-learn >= 0.18.1
......
...@@ -6,56 +6,19 @@ Created on 2017-4-25 ...@@ -6,56 +6,19 @@ Created on 2017-4-25
""" """
import platform import platform
import sys
import io import io
from setuptools import setup from setuptools import setup
from setuptools import find_packages from setuptools import find_packages
from distutils.extension import Extension
import numpy as np import numpy as np
import Cython
from Cython.Build import cythonize
Cython.Compiler.Options.annotate = True
VERSION = "0.1.0" VERSION = "0.1.0"
if "--line_trace" in sys.argv:
line_trace = True
print("Build with line trace enabled ...")
sys.argv.remove("--line_trace")
else:
line_trace = False
ext_modules = ['alphamind/cyimpl.pyx']
def generate_extensions(ext_modules, line_trace=False):
extensions = []
if line_trace:
print("define cython trace to True ...")
define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
else:
define_macros = []
if platform.system() != "Windows":
extra_compile_args = ['-O3', '-std=c++11']
else:
extra_compile_args = ['/Ox']
for pyxfile in ext_modules:
ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
sources=[pyxfile],
define_macros=define_macros,
extra_compile_args=extra_compile_args)
extensions.append(ext)
return extensions
if platform.system() != "Windows": if platform.system() != "Windows":
import multiprocessing import multiprocessing
n_cpu = multiprocessing.cpu_count() n_cpu = multiprocessing.cpu_count()
else: else:
n_cpu = 0 n_cpu = 0
ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
compiler_directives={'embedsignature': True, 'linetrace': line_trace},
nthreads=n_cpu)
setup( setup(
name='Alpha-Mind', name='Alpha-Mind',
version=VERSION, version=VERSION,
...@@ -65,7 +28,6 @@ setup( ...@@ -65,7 +28,6 @@ setup(
author='wegamekinglc', author='wegamekinglc',
author_email='', author_email='',
install_requires=io.open('requirements.txt', encoding='utf8').read(), install_requires=io.open('requirements.txt', encoding='utf8').read(),
ext_modules=ext_modules_settings,
include_dirs=[np.get_include()], include_dirs=[np.get_include()],
description='' description=''
) )
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment