Commit 8e2520bc authored by Dr.李's avatar Dr.李

finally we remove the dependency on cython

parent 692bd963
......@@ -35,7 +35,6 @@ install:
- conda install numba
- conda install scipy
- conda install pandas
- conda install cython
- conda install scikit-learn
- pip install cvxopt
- pip install cvxpy
......@@ -43,9 +42,9 @@ install:
- pip install coveralls
script:
- export NUMBA_DISABLE_JIT=1
- python setup.py build_ext --line_trace --inplace
- coverage run --rcfile=./.coveragerc alphamind/tests/test_suite.py
- coverage report --rcfile=./.coveragerc -i
- coverage html --rcfile=./.coveragerc -i
- python setup.py build_ext --inplace
- coverage run alphamind/tests/test_suite.py
- coverage report
- coverage html
after_success:
- coveralls
# -*- coding: utf-8 -*-
"""
Created on 2017-5-12
@author: cheng.li
"""
import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis
def perf_attribution_by_pos(net_weight_series: pd.Series,
next_bar_return_series: pd.Series,
benchmark_table: pd.DataFrame) -> pd.DataFrame:
explained_table, _ = risk_analysis(net_weight_series,
next_bar_return_series,
benchmark_table)
return explained_table.groupby(level=0).sum()
......@@ -25,13 +25,13 @@ def risk_analysis(net_weight_series: pd.Series,
output_exposure=True,
output_explained=True)
systemetic = other_stats['explained']
systematic = other_stats['explained']
exposure = other_stats['exposure']
explained_table = np.hstack((idiosyncratic, systemetic[:, :, 0]))
explained_table = np.hstack((idiosyncratic, systematic[:, :, 0]))
cols = ['idiosyncratic']
cols.extend(risk_factor_cols)
explained_table = pd.DataFrame(explained_table * net_pos , columns=cols, index=net_weight_series.index)
explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, index=net_weight_series.index)
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index)
return explained_table, exposure_table.groupby(level=0).first()
# -*- coding: utf-8 -*-
# distutils: language = c++
"""
Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
cimport numpy as np
cimport cython
from libcpp.vector cimport vector as cpp_vector
from libcpp.unordered_map cimport unordered_map as cpp_map
from cython.operator cimport dereference as deref
ctypedef long long int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.initializedcheck(False)
cpdef groupby(long[:] groups):
cdef long long length = groups.shape[0]
cdef cpp_map[long, cpp_vector[int64_t]] group_ids
cdef long long i
cdef long curr_tag
cdef cpp_map[long, cpp_vector[int64_t]].iterator it
cdef np.ndarray[long long, ndim=1] npy_array
for i in range(length):
curr_tag = groups[i]
it = group_ids.find(curr_tag)
if it == group_ids.end():
group_ids[curr_tag] = [i]
else:
deref(it).second.push_back(i)
return {k: np.array(v) for k, v in group_ids.items()}
\ No newline at end of file
......@@ -12,7 +12,7 @@ from numpy.linalg import solve
from typing import Tuple
from typing import Union
from typing import Dict
from alphamind.cyimpl import groupby
from alphamind.utilities import groupby
def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
......@@ -35,9 +35,11 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
if output_exposure:
exposure = zeros(x.shape + (1,))
groups_ids = groupby(groups)
index_diff, order = groupby(groups)
for curr_idx in groups_ids.values():
start = 0
for diff_loc in index_diff:
curr_idx = order[start:diff_loc + 1]
curr_x, b = _sub_step(x, y, curr_idx, res)
if output_exposure:
for i in range(exposure.shape[2]):
......@@ -45,6 +47,16 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
if output_explained:
for i in range(explained.shape[2]):
explained[curr_idx] = ls_explain(curr_x, b)
start = diff_loc + 1
curr_idx = order[start:]
curr_x, b = _sub_step(x, y, curr_idx, res)
if output_exposure:
for i in range(exposure.shape[2]):
exposure[curr_idx, :, i] = b[:, i]
if output_explained:
for i in range(explained.shape[2]):
explained[curr_idx] = ls_explain(curr_x, b)
else:
b = ls_fit(x, y)
res = ls_res(x, y, b)
......
......@@ -5,10 +5,11 @@ Created on 2017-5-10
@author: cheng.li
"""
from typing import Tuple
from typing import Union
import numpy as np
import numba as nb
from alphamind.cyimpl import groupby
from alphamind.utilities import groupby
from alphamind.data.neutralize import ls_fit
......@@ -21,13 +22,9 @@ class LinearModel(object):
self.model_parameter = _train(x, y, groups)
def predict(self, x, groups=None):
if groups is not None and isinstance(self.model_parameter, dict):
if groups is not None and isinstance(self.model_parameter, tuple):
names = np.unique(groups)
pred_v = np.zeros(x.shape[0])
for name in names:
this_param = self.model_parameter[name]
_prediction_group(name, groups, this_param, x, pred_v)
return pred_v
return _prediction_impl(self.model_parameter[0], self.model_parameter[1], groups, names, x)
elif self.model_parameter is None:
raise ValueError("linear model is not calibrated yet")
elif groups is None:
......@@ -37,22 +34,35 @@ class LinearModel(object):
@nb.njit(nogil=True, cache=True)
def _prediction_group(name, groups, this_param, x, pred_v):
idx = groups == name
pred_v[idx] = x[idx] @ this_param
def _prediction_impl(calibrated_names, model_parameter, groups, names, x):
places = np.searchsorted(calibrated_names, names)
pred_v = np.zeros(x.shape[0])
for k, name in zip(places, names):
this_param = model_parameter[k]
idx = groups == name
pred_v[idx] = x[idx] @ this_param
return pred_v
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
def _train(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
if groups is None:
return ls_fit(x, y)
else:
groups_ids = groupby(groups)
res_beta = {}
index_diff, order = groupby(groups)
res_beta = _train_loop(index_diff, order, x, y)
return np.unique(groups), res_beta
for k, curr_idx in groups_ids.items():
res_beta[k] = _train_sub_group(x, y, curr_idx)
return res_beta
@nb.njit(nogil=True, cache=True)
def _train_loop(index_diff, order, x, y):
res_beta = np.zeros((len(index_diff)+1, x.shape[1]))
start = 0
for k, diff_loc in enumerate(index_diff):
res_beta[k] = _train_sub_group(x, y, order[start:diff_loc + 1])
start = diff_loc + 1
res_beta[k + 1] = _train_sub_group(x, y, order[start:])
return res_beta
@nb.njit(nogil=True, cache=True)
......@@ -60,25 +70,3 @@ def _train_sub_group(x, y, curr_idx):
curr_x = x[curr_idx]
curr_y = y[curr_idx]
return ls_fit(curr_x, curr_y)
if __name__ == '__main__':
import datetime as dt
x = np.random.randn(3000, 10)
y = np.random.randn(3000)
groups = np.random.randint(30, size=3000)
to_x = np.random.randn(100, 10)
to_groups = np.random.randint(30, size=100)
model = LinearModel()
start = dt.datetime.now()
for i in range(5000):
model.calibrate(x, y, groups)
print(dt.datetime.now() - start)
start = dt.datetime.now()
for i in range(50000):
model.predict(to_x, to_groups)
print(dt.datetime.now() - start)
\ No newline at end of file
......@@ -8,7 +8,7 @@ Created on 2017-5-4
import numpy as np
from numpy import zeros
from numpy import zeros_like
from alphamind.cyimpl import groupby
from alphamind.utilities import groupby
from alphamind.utilities import set_value
......@@ -20,12 +20,21 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
length = len(neg_er)
weights = zeros((length, 1))
if groups is not None:
group_ids = groupby(groups)
for current_index in group_ids.values():
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
else:
ordering = neg_er.argsort()
use_rank = int(percent * len(neg_er))
......@@ -36,11 +45,18 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None) -> np
weights = zeros_like(er)
if groups is not None:
group_ids = groupby(groups)
for current_index in group_ids.values():
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort(axis=0)
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort(axis=0)
use_rank = int(percent * len(current_index))
set_value(weights, current_index[current_ordering[:use_rank]], 1)
else:
ordering = neg_er.argsort(axis=0)
use_rank = int(percent * len(neg_er))
......
......@@ -8,7 +8,7 @@ Created on 2017-4-26
import numpy as np
from numpy import zeros
from numpy import zeros_like
from alphamind.cyimpl import groupby
from alphamind.utilities import groupby
from alphamind.utilities import set_value
......@@ -20,11 +20,19 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
length = len(neg_er)
weights = zeros((length, 1))
if groups is not None:
group_ids = groupby(groups)
for current_index in group_ids.values():
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
else:
ordering = neg_er.argsort()
weights[ordering[:use_rank]] = 1.
......@@ -34,10 +42,17 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
weights = zeros_like(er)
if groups is not None:
group_ids = groupby(groups)
for current_index in group_ids.values():
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort(axis=0)
set_value(weights, current_index[current_ordering[:use_rank]], 1)
start = diff_loc + 1
current_index = order[start:]
current_ordering = neg_er[current_index].argsort(axis=0)
set_value(weights, current_index[current_ordering[:use_rank]], 1)
else:
ordering = neg_er.argsort(axis=0)
set_value(weights, ordering[:use_rank], 1.)
......
......@@ -14,6 +14,13 @@ import numba as nb
alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def groupby(groups):
order = groups.argsort()
t = groups[order]
index_diff = np.where(np.diff(t))[0]
return index_diff, order
@nb.njit(nogil=True, cache=True)
def set_value(mat, used_level, to_fill):
length, width = used_level.shape
......
cvxopt >= 1.1.9
cvxpy >= 0.4.9
cython >= 0.25.2
numpy >= 1.12.1
numba >= 0.30.0
scikit-learn >= 0.18.1
......
......@@ -6,56 +6,19 @@ Created on 2017-4-25
"""
import platform
import sys
import io
from setuptools import setup
from setuptools import find_packages
from distutils.extension import Extension
import numpy as np
import Cython
from Cython.Build import cythonize
Cython.Compiler.Options.annotate = True
VERSION = "0.1.0"
if "--line_trace" in sys.argv:
line_trace = True
print("Build with line trace enabled ...")
sys.argv.remove("--line_trace")
else:
line_trace = False
ext_modules = ['alphamind/cyimpl.pyx']
def generate_extensions(ext_modules, line_trace=False):
extensions = []
if line_trace:
print("define cython trace to True ...")
define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
else:
define_macros = []
if platform.system() != "Windows":
extra_compile_args = ['-O3', '-std=c++11']
else:
extra_compile_args = ['/Ox']
for pyxfile in ext_modules:
ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
sources=[pyxfile],
define_macros=define_macros,
extra_compile_args=extra_compile_args)
extensions.append(ext)
return extensions
if platform.system() != "Windows":
import multiprocessing
n_cpu = multiprocessing.cpu_count()
else:
n_cpu = 0
ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
compiler_directives={'embedsignature': True, 'linetrace': line_trace},
nthreads=n_cpu)
setup(
name='Alpha-Mind',
version=VERSION,
......@@ -65,7 +28,6 @@ setup(
author='wegamekinglc',
author_email='',
install_requires=io.open('requirements.txt', encoding='utf8').read(),
ext_modules=ext_modules_settings,
include_dirs=[np.get_include()],
description=''
)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment