added more

7b72cb5a · Dr.李 · b9d2a972 · 7b72cb5a · 7b72cb5a · 7b72cb5a
Commit 7b72cb5a authored Apr 30, 2017 by Dr.李
32 changed files
--- a/.gitignore
+++ b/.gitignore
-*.pyc
-.idea/*
\ No newline at end of file
+*.pyc
+.idea/*
+build/*
+dist/*
+Alpha_Mind.egg-info/*
+*.pyd
+*.c
+*.cpp
+*.html
--- a/alphamind/__init__.py
+++ b/alphamind/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/aggregate.pyx
+++ b/alphamind/aggregate.pyx
-# -*- coding: utf-8 -*-
-# distutils: language = c++
-"""
-Created on 2017-4-26
-
-@author: cheng.li
-"""
-
-import numpy as np
-cimport numpy as np
-cimport cython
-from libc.math cimport sqrt
-from libc.math cimport fabs
-from libc.stdlib cimport calloc
-from libc.stdlib cimport free
-from numpy import array
-from libcpp.vector cimport vector as cpp_vector
-from libcpp.unordered_map cimport unordered_map as cpp_map
-from cython.operator cimport dereference as deref
-
-np.import_array()
-
-cdef extern from "numpy/arrayobject.h":
-    void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
-
-ctypedef long long int64_t
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef list groupby(long[:] groups):
-
-    cdef long long length = groups.shape[0]
-    cdef cpp_map[long, cpp_vector[int64_t]] group_ids
-    cdef long long i
-    cdef long curr_tag
-    cdef cpp_map[long, cpp_vector[int64_t]].iterator it
-    cdef list res = []
-    cdef np.ndarray[long long, ndim=1] npy_array
-    cdef cpp_vector[int64_t] v
-    cdef long long* arr_ptr
-
-    for i in range(length):
-        curr_tag = groups[i]
-        it = group_ids.find(curr_tag)
-
-        if it == group_ids.end():
-            group_ids[curr_tag] = [i]
-        else:
-            deref(it).second.push_back(i)
-
-    for v in group_ids.values():
-        res.append(array(v, dtype=np.int64))
-
-    return res
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cdef long* group_mapping(long* groups, size_t length, size_t* max_g):
-    cdef long *res_ptr = <long*>calloc(length, sizeof(int))
-    cdef cpp_map[long, long] current_hold
-    cdef long curr_tag
-    cdef long running_tag = -1
-    cdef size_t i = 0
-    cdef cpp_map[long, long].iterator it
-
-    for i in range(length):
-        curr_tag = groups[i]
-        it = current_hold.find(curr_tag)
-        if it == current_hold.end():
-            running_tag += 1
-            res_ptr[i] = running_tag
-            current_hold[curr_tag] = running_tag
-        else:
-            res_ptr[i] = deref(it).second
-
-    max_g[0] = running_tag
-    return res_ptr
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
-@cython.initializedcheck(False)
-cdef double* agg_sum(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
-    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
-    cdef size_t i
-    cdef size_t j
-    cdef size_t loop_idx1
-    cdef size_t loop_idx2
-    cdef long curr
-
-    for i in range(length):
-        loop_idx1 = i*width
-        loop_idx2 = groups[i]*width
-        for j in range(width):
-            res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
-    return res_ptr
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
-@cython.initializedcheck(False)
-cdef double* agg_abssum(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
-    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
-    cdef size_t i
-    cdef size_t j
-    cdef size_t loop_idx1
-    cdef size_t loop_idx2
-    cdef long curr
-
-    for i in range(length):
-        loop_idx1 = i*width
-        loop_idx2 = groups[i]*width
-        for j in range(width):
-            res_ptr[loop_idx2 + j] += fabs(x[loop_idx1 + j])
-    return res_ptr
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
-@cython.initializedcheck(False)
-cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
-    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
-    cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
-    cdef size_t i
-    cdef size_t j
-    cdef size_t loop_idx1
-    cdef size_t loop_idx2
-    cdef long curr
-
-    for i in range(length):
-        loop_idx1 = i*width
-        loop_idx2 = groups[i]*width
-        for j in range(width):
-            res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
-        bin_count_ptr[groups[i]] += 1
-
-    for i in range(max_g+1):
-        curr = bin_count_ptr[i]
-        if curr != 0:
-            loop_idx1 = i*width
-            for j in range(width):
-                res_ptr[loop_idx1 + j] /= curr
-
-    free(bin_count_ptr)
-    return res_ptr
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.cdivision(True)
-@cython.initializedcheck(False)
-cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_t width, long ddof=1) nogil:
-    cdef double* running_sum_square_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
-    cdef double* running_sum_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
-    cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
-    cdef size_t i
-    cdef size_t j
-    cdef size_t loop_idx1
-    cdef size_t loop_idx2
-    cdef long curr
-    cdef double raw_value
-
-    for i in range(length):
-        loop_idx1 = i * width
-        loop_idx2 = groups[i] * width
-
-        for j in range(width):
-            raw_value = x[loop_idx1 + j]
-            running_sum_ptr[loop_idx2 + j] += raw_value
-            running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
-        bin_count_ptr[groups[i]] += 1
-
-    for i in range(max_g+1):
-        curr = bin_count_ptr[i]
-        loop_idx1 = i * width
-        if curr != 0:
-            for j in range(width):
-                loop_idx2 = loop_idx1 + j
-                running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
-
-    free(running_sum_ptr)
-    free(bin_count_ptr)
-    return running_sum_square_ptr
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str func):
-    cdef size_t length = x.shape[0]
-    cdef size_t width = x.shape[1]
-    cdef size_t* max_g = <size_t*>calloc(1, sizeof(size_t))
-    cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
-    cdef double* res_data_ptr = <double*>calloc(length*width, sizeof(double))
-    cdef double* value_data_ptr
-    cdef np.ndarray[double, ndim=2] res
-    cdef size_t i
-    cdef size_t j
-    cdef size_t loop_idx1
-    cdef size_t loop_idx2
-
-
-    if func == 'mean':
-        value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
-    elif func == 'std':
-        value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
-    elif func == 'sum':
-        value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
-    elif func =='abssum':
-        value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
-
-    with nogil:
-        for i in range(length):
-            loop_idx1 = i*width
-            loop_idx2 = mapped_groups[i] * width
-            for j in range(width):
-                res_data_ptr[loop_idx1 + j] = value_data_ptr[loop_idx2 + j]
-        free(value_data_ptr)
-        free(mapped_groups)
-        free(max_g)
-    res = np.PyArray_SimpleNewFromData(2, [length, width], np.NPY_FLOAT64, res_data_ptr)
-    PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
-    return res
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef np.ndarray[double, ndim=2] aggregate(long[:] groups, double[:, :] x, str func):
-    cdef size_t length = x.shape[0]
-    cdef size_t width = x.shape[1]
-    cdef size_t* max_g = <size_t*>calloc(1, sizeof(size_t))
-    cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
-    cdef np.ndarray[double, ndim=2] res
-    cdef double* value_data_ptr
-
-    if func == 'mean':
-        value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
-    elif func == 'std':
-        value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
-    elif func == 'sum':
-        value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
-    elif func =='abssum':
-        value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
-
-    res = np.PyArray_SimpleNewFromData(2, [max_g[0]+1, width], np.NPY_FLOAT64, value_data_ptr)
-    PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
-    free(mapped_groups)
-    free(max_g)
+# -*- coding: utf-8 -*-
+# distutils: language = c++
+"""
+Created on 2017-4-26
+
+@author: cheng.li
+"""
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport sqrt
+from libc.math cimport fabs
+from libc.stdlib cimport calloc
+from libc.stdlib cimport free
+from numpy import array
+from libcpp.vector cimport vector as cpp_vector
+from libcpp.unordered_map cimport unordered_map as cpp_map
+from cython.operator cimport dereference as deref
+
+np.import_array()
+
+cdef extern from "numpy/arrayobject.h":
+    void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
+
+ctypedef long long int64_t
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef list groupby(long[:] groups):
+
+    cdef long long length = groups.shape[0]
+    cdef cpp_map[long, cpp_vector[int64_t]] group_ids
+    cdef long long i
+    cdef long curr_tag
+    cdef cpp_map[long, cpp_vector[int64_t]].iterator it
+    cdef list res = []
+    cdef np.ndarray[long long, ndim=1] npy_array
+    cdef cpp_vector[int64_t] v
+    cdef long long* arr_ptr
+
+    for i in range(length):
+        curr_tag = groups[i]
+        it = group_ids.find(curr_tag)
+
+        if it == group_ids.end():
+            group_ids[curr_tag] = [i]
+        else:
+            deref(it).second.push_back(i)
+
+    for v in group_ids.values():
+        res.append(array(v, dtype=np.int64))
+
+    return res
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cdef long* group_mapping(long* groups, size_t length, size_t* max_g):
+    cdef long *res_ptr = <long*>calloc(length, sizeof(int))
+    cdef cpp_map[long, long] current_hold
+    cdef long curr_tag
+    cdef long running_tag = -1
+    cdef size_t i = 0
+    cdef cpp_map[long, long].iterator it
+
+    for i in range(length):
+        curr_tag = groups[i]
+        it = current_hold.find(curr_tag)
+        if it == current_hold.end():
+            running_tag += 1
+            res_ptr[i] = running_tag
+            current_hold[curr_tag] = running_tag
+        else:
+            res_ptr[i] = deref(it).second
+
+    max_g[0] = running_tag
+    return res_ptr
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+@cython.initializedcheck(False)
+cdef double* agg_sum(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
+    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
+    cdef size_t i
+    cdef size_t j
+    cdef size_t loop_idx1
+    cdef size_t loop_idx2
+    cdef long curr
+
+    for i in range(length):
+        loop_idx1 = i*width
+        loop_idx2 = groups[i]*width
+        for j in range(width):
+            res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
+    return res_ptr
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+@cython.initializedcheck(False)
+cdef double* agg_abssum(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
+    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
+    cdef size_t i
+    cdef size_t j
+    cdef size_t loop_idx1
+    cdef size_t loop_idx2
+    cdef long curr
+
+    for i in range(length):
+        loop_idx1 = i*width
+        loop_idx2 = groups[i]*width
+        for j in range(width):
+            res_ptr[loop_idx2 + j] += fabs(x[loop_idx1 + j])
+    return res_ptr
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+@cython.initializedcheck(False)
+cdef double* agg_mean(long* groups, size_t max_g, double* x, size_t length, size_t width) nogil:
+    cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
+    cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
+    cdef size_t i
+    cdef size_t j
+    cdef size_t loop_idx1
+    cdef size_t loop_idx2
+    cdef long curr
+
+    for i in range(length):
+        loop_idx1 = i*width
+        loop_idx2 = groups[i]*width
+        for j in range(width):
+            res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
+        bin_count_ptr[groups[i]] += 1
+
+    for i in range(max_g+1):
+        curr = bin_count_ptr[i]
+        if curr != 0:
+            loop_idx1 = i*width
+            for j in range(width):
+                res_ptr[loop_idx1 + j] /= curr
+
+    free(bin_count_ptr)
+    return res_ptr
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+@cython.initializedcheck(False)
+cdef double* agg_std(long* groups, size_t max_g, double* x, size_t length, size_t width, long ddof=1) nogil:
+    cdef double* running_sum_square_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
+    cdef double* running_sum_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
+    cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
+    cdef size_t i
+    cdef size_t j
+    cdef size_t loop_idx1
+    cdef size_t loop_idx2
+    cdef long curr
+    cdef double raw_value
+
+    for i in range(length):
+        loop_idx1 = i * width
+        loop_idx2 = groups[i] * width
+
+        for j in range(width):
+            raw_value = x[loop_idx1 + j]
+            running_sum_ptr[loop_idx2 + j] += raw_value
+            running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
+        bin_count_ptr[groups[i]] += 1
+
+    for i in range(max_g+1):
+        curr = bin_count_ptr[i]
+        loop_idx1 = i * width
+        if curr != 0:
+            for j in range(width):
+                loop_idx2 = loop_idx1 + j
+                running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
+
+    free(running_sum_ptr)
+    free(bin_count_ptr)
+    return running_sum_square_ptr
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str func):
+    cdef size_t length = x.shape[0]
+    cdef size_t width = x.shape[1]
+    cdef size_t* max_g = <size_t*>calloc(1, sizeof(size_t))
+    cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
+    cdef double* res_data_ptr = <double*>calloc(length*width, sizeof(double))
+    cdef double* value_data_ptr
+    cdef np.ndarray[double, ndim=2] res
+    cdef size_t i
+    cdef size_t j
+    cdef size_t loop_idx1
+    cdef size_t loop_idx2
+
+
+    if func == 'mean':
+        value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
+    elif func == 'std':
+        value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
+    elif func == 'sum':
+        value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
+    elif func =='abssum':
+        value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
+
+    with nogil:
+        for i in range(length):
+            loop_idx1 = i*width
+            loop_idx2 = mapped_groups[i] * width
+            for j in range(width):
+                res_data_ptr[loop_idx1 + j] = value_data_ptr[loop_idx2 + j]
+        free(value_data_ptr)
+        free(mapped_groups)
+        free(max_g)
+    res = np.PyArray_SimpleNewFromData(2, [length, width], np.NPY_FLOAT64, res_data_ptr)
+    PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
+    return res
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef np.ndarray[double, ndim=2] aggregate(long[:] groups, double[:, :] x, str func):
+    cdef size_t length = x.shape[0]
+    cdef size_t width = x.shape[1]
+    cdef size_t* max_g = <size_t*>calloc(1, sizeof(size_t))
+    cdef long* mapped_groups = group_mapping(&groups[0], length, max_g)
+    cdef np.ndarray[double, ndim=2] res
+    cdef double* value_data_ptr
+
+    if func == 'mean':
+        value_data_ptr = agg_mean(mapped_groups, max_g[0], &x[0, 0], length, width)
+    elif func == 'std':
+        value_data_ptr = agg_std(mapped_groups, max_g[0], &x[0, 0], length, width, ddof=1)
+    elif func == 'sum':
+        value_data_ptr = agg_sum(mapped_groups, max_g[0], &x[0, 0], length, width)
+    elif func =='abssum':
+        value_data_ptr = agg_abssum(mapped_groups, max_g[0], &x[0, 0], length, width)
+
+    res = np.PyArray_SimpleNewFromData(2, [max_g[0]+1, width], np.NPY_FLOAT64, value_data_ptr)
+    PyArray_ENABLEFLAGS(res, np.NPY_OWNDATA)
+    free(mapped_groups)
+    free(max_g)
    return res
\ No newline at end of file
--- a/alphamind/benchmarks/benchmarks.py
+++ b/alphamind/benchmarks/benchmarks.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-from alphamind.benchmarks.data.neutralize import benchmark_neutralize
-from alphamind.benchmarks.data.standardize import benchmark_standardize
-from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
-from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
-from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
-from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
-from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
-from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
-from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
-
-
-if __name__ == '__main__':
-
-    benchmark_neutralize(3000, 10, 1000)
-    benchmark_neutralize(30, 10, 50000)
-    benchmark_neutralize(50000, 50, 20)
-    benchmark_standardize(3000, 10, 1000)
-    benchmark_standardize_with_group(3000, 10, 1000, 30)
-    benchmark_standardize(30, 10, 50000)
-    benchmark_standardize_with_group(30, 10, 5000, 5)
-    benchmark_standardize(50000, 50, 20)
-    benchmark_standardize_with_group(50000, 50, 20, 50)
-    benchmark_winsorize_normal(3000, 10, 1000)
-    benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
-    benchmark_winsorize_normal(30, 10, 50000)
-    benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
-    benchmark_winsorize_normal(50000, 50, 20)
-    benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
-    benchmark_build_rank(3000, 1000, 300)
-    benchmark_build_rank_with_group(3000, 1000, 10, 30)
-    benchmark_build_rank(30, 50000, 3)
-    benchmark_build_rank_with_group(30, 50000, 1, 3)
-    benchmark_build_rank(50000, 20, 3000)
-    benchmark_build_rank_with_group(50000, 20, 10, 300)
-    benchmark_simple_settle(3000, 10, 1000)
-    benchmark_simple_settle_with_group(3000, 10, 1000, 30)
-    benchmark_simple_settle(30, 10, 50000)
-    benchmark_simple_settle_with_group(30, 10, 5000, 5)
-    benchmark_simple_settle(50000, 50, 20)
-    benchmark_simple_settle_with_group(50000, 50, 20, 50)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+from alphamind.benchmarks.data.neutralize import benchmark_neutralize
+from alphamind.benchmarks.data.standardize import benchmark_standardize
+from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
+from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
+from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
+from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
+from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
+from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
+from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
+
+
+if __name__ == '__main__':
+
+    benchmark_neutralize(3000, 10, 1000)
+    benchmark_neutralize(30, 10, 50000)
+    benchmark_neutralize(50000, 50, 20)
+    benchmark_standardize(3000, 10, 1000)
+    benchmark_standardize_with_group(3000, 10, 1000, 30)
+    benchmark_standardize(30, 10, 50000)
+    benchmark_standardize_with_group(30, 10, 5000, 5)
+    benchmark_standardize(50000, 50, 20)
+    benchmark_standardize_with_group(50000, 50, 20, 50)
+    benchmark_winsorize_normal(3000, 10, 1000)
+    benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
+    benchmark_winsorize_normal(30, 10, 50000)
+    benchmark_winsorize_normal_with_group(30, 10, 5000, 5)
+    benchmark_winsorize_normal(50000, 50, 20)
+    benchmark_winsorize_normal_with_group(50000, 50, 20, 50)
+    benchmark_build_rank(3000, 1000, 300)
+    benchmark_build_rank_with_group(3000, 1000, 10, 30)
+    benchmark_build_rank(30, 50000, 3)
+    benchmark_build_rank_with_group(30, 50000, 1, 3)
+    benchmark_build_rank(50000, 20, 3000)
+    benchmark_build_rank_with_group(50000, 20, 10, 300)
+    benchmark_simple_settle(3000, 10, 1000)
+    benchmark_simple_settle_with_group(3000, 10, 1000, 30)
+    benchmark_simple_settle(30, 10, 50000)
+    benchmark_simple_settle_with_group(30, 10, 5000, 5)
+    benchmark_simple_settle(50000, 50, 20)
+    benchmark_simple_settle_with_group(50000, 50, 20, 50)
--- a/alphamind/benchmarks/data/neutralize.py
+++ b/alphamind/benchmarks/data/neutralize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import datetime as dt
-
-import numpy as np
-from sklearn.linear_model import LinearRegression
-
-from alphamind.data.neutralize import neutralize
-
-
-def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
-    print("-" * 60)
-    print("Starting least square fitting benchmarking")
-    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
-
-    y = np.random.randn(n_samples, 5)
-    x = np.random.randn(n_samples, n_features)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = neutralize(x, y)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        benchmark_model = LinearRegression(fit_intercept=False)
-        benchmark_model.fit(x, y)
-        _ = y - x @ benchmark_model.coef_.T
-    benchmark_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-if __name__ == '__main__':
-    benchmark_neutralize(3000, 10, 1000)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import datetime as dt
+
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+from alphamind.data.neutralize import neutralize
+
+
+def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
+    print("-" * 60)
+    print("Starting least square fitting benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
+
+    y = np.random.randn(n_samples, 5)
+    x = np.random.randn(n_samples, n_features)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = neutralize(x, y)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        benchmark_model = LinearRegression(fit_intercept=False)
+        benchmark_model.fit(x, y)
+        _ = y - x @ benchmark_model.coef_.T
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+if __name__ == '__main__':
+    benchmark_neutralize(3000, 10, 1000)
--- a/alphamind/benchmarks/data/standardize.py
+++ b/alphamind/benchmarks/data/standardize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import datetime as dt
-import numpy as np
-import pandas as pd
-from scipy.stats import zscore
-from alphamind.data.standardize import standardize
-
-
-def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
-    print("-" * 60)
-    print("Starting standardizing benchmarking")
-    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
-
-    x = np.random.randn(n_samples, n_features)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = standardize(x)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = zscore(x)
-    benchmark_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
-    print("-" * 60)
-    print("Starting standardizing with group-by values benchmarking")
-    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
-
-    x = np.random.randn(n_samples, n_features)
-    groups = np.random.randint(n_groups, size=n_samples)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = standardize(x, groups=groups)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
-    benchmark_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-if __name__ == '__main__':
-    benchmark_standardize(3000, 10, 1000)
-    benchmark_standardize_with_group(3000, 10, 1000, 30)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from scipy.stats import zscore
+from alphamind.data.standardize import standardize
+
+
+def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
+    print("-" * 60)
+    print("Starting standardizing benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
+
+    x = np.random.randn(n_samples, n_features)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = standardize(x)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = zscore(x)
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting standardizing with group-by values benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
+
+    x = np.random.randn(n_samples, n_features)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = standardize(x, groups=groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+if __name__ == '__main__':
+    benchmark_standardize(3000, 10, 1000)
+    benchmark_standardize_with_group(3000, 10, 1000, 30)
--- a/alphamind/benchmarks/data/winsorize.py
+++ b/alphamind/benchmarks/data/winsorize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import datetime as dt
-import numpy as np
-import pandas as pd
-from alphamind.data.winsorize import winsorize_normal
-
-
-def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
-    print("-" * 60)
-    print("Starting winsorize normal benchmarking")
-    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
-
-    num_stds = 2
-
-    x = np.random.randn(n_samples, n_features)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = winsorize_normal(x, num_stds)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    def impl(x):
-        std_values = x.std(axis=0)
-        mean_value = x.mean(axis=0)
-
-        lower_bound = mean_value - num_stds * std_values
-        upper_bound = mean_value + num_stds * std_values
-
-        res = np.where(x > upper_bound, upper_bound, x)
-        res = np.where(res < lower_bound, lower_bound, res)
-        return res
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = impl(x)
-    benchmark_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
-    print("-" * 60)
-    print("Starting winsorize normal with group-by values benchmarking")
-    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
-
-    num_stds = 2
-
-    x = np.random.randn(n_samples, n_features)
-    groups = np.random.randint(n_groups, size=n_samples)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = winsorize_normal(x, num_stds, groups=groups)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    def impl(x):
-        std_values = x.std(axis=0)
-        mean_value = x.mean(axis=0)
-
-        lower_bound = mean_value - num_stds * std_values
-        upper_bound = mean_value + num_stds * std_values
-
-        res = np.where(x > upper_bound, upper_bound, x)
-        res = np.where(res < lower_bound, lower_bound, res)
-        return res
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        _ = pd.DataFrame(x).groupby(groups).transform(impl)
-    benchmark_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-if __name__ == '__main__':
-    benchmark_winsorize_normal(3000, 10, 1000)
-    benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from alphamind.data.winsorize import winsorize_normal
+
+
+def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
+    print("-" * 60)
+    print("Starting winsorize normal benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
+
+    num_stds = 2
+
+    x = np.random.randn(n_samples, n_features)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = winsorize_normal(x, num_stds)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    def impl(x):
+        std_values = x.std(axis=0)
+        mean_value = x.mean(axis=0)
+
+        lower_bound = mean_value - num_stds * std_values
+        upper_bound = mean_value + num_stds * std_values
+
+        res = np.where(x > upper_bound, upper_bound, x)
+        res = np.where(res < lower_bound, lower_bound, res)
+        return res
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = impl(x)
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting winsorize normal with group-by values benchmarking")
+    print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
+
+    num_stds = 2
+
+    x = np.random.randn(n_samples, n_features)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = winsorize_normal(x, num_stds, groups=groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    def impl(x):
+        std_values = x.std(axis=0)
+        mean_value = x.mean(axis=0)
+
+        lower_bound = mean_value - num_stds * std_values
+        upper_bound = mean_value + num_stds * std_values
+
+        res = np.where(x > upper_bound, upper_bound, x)
+        res = np.where(res < lower_bound, lower_bound, res)
+        return res
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        _ = pd.DataFrame(x).groupby(groups).transform(impl)
+    benchmark_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+if __name__ == '__main__':
+    benchmark_winsorize_normal(3000, 10, 1000)
+    benchmark_winsorize_normal_with_group(3000, 10, 1000, 30)
--- a/alphamind/benchmarks/portfolio/__init__.py
+++ b/alphamind/benchmarks/portfolio/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-27
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-27
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/benchmarks/portfolio/rankbuild.py
+++ b/alphamind/benchmarks/portfolio/rankbuild.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-27
-
-@author: cheng.li
-"""
-
-import datetime as dt
-import numpy as np
-import pandas as pd
-from alphamind.portfolio.rankbuilder import rank_build
-
-
-def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
-    print("-" * 60)
-    print("Starting portfolio construction by rank benchmarking")
-    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
-
-    n_portfolio = 10
-
-    x = np.random.randn(n_samples, n_portfolio)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        calc_weights = rank_build(x, n_included)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        exp_weights = np.zeros((len(x), n_portfolio))
-        choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
-        for j in range(n_portfolio):
-            exp_weights[choosed_index[:, j], j] = 1.
-    benchmark_model_time = dt.datetime.now() - start
-
-    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None:
-    print("-" * 60)
-    print("Starting  portfolio construction by rank with group-by values benchmarking")
-    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
-
-    n_portfolio = 10
-
-    x = np.random.randn(n_samples, n_portfolio)
-    groups = np.random.randint(n_groups, size=n_samples)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        calc_weights = rank_build(x, n_included, groups=groups)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
-        exp_weights = np.zeros((len(x), n_portfolio))
-        masks = (grouped_ordering <= n_included).values
-        for j in range(n_portfolio):
-            exp_weights[masks[:, j], j] = 1.
-    benchmark_model_time = dt.datetime.now() - start
-
-    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-if __name__ == '__main__':
-    benchmark_build_rank(3000, 1000, 300)
-    benchmark_build_rank_with_group(3000, 1000, 10, 30)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-27
+
+@author: cheng.li
+"""
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from alphamind.portfolio.rankbuilder import rank_build
+
+
+def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
+    print("-" * 60)
+    print("Starting portfolio construction by rank benchmarking")
+    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
+
+    n_portfolio = 10
+
+    x = np.random.randn(n_samples, n_portfolio)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        calc_weights = rank_build(x, n_included)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        exp_weights = np.zeros((len(x), n_portfolio))
+        choosed_index = (-x).argsort(axis=0).argsort(axis=0) < n_included
+        for j in range(n_portfolio):
+            exp_weights[choosed_index[:, j], j] = 1.
+    benchmark_model_time = dt.datetime.now() - start
+
+    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting  portfolio construction by rank with group-by values benchmarking")
+    print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
+
+    n_portfolio = 10
+
+    x = np.random.randn(n_samples, n_portfolio)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        calc_weights = rank_build(x, n_included, groups=groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
+        exp_weights = np.zeros((len(x), n_portfolio))
+        masks = (grouped_ordering <= n_included).values
+        for j in range(n_portfolio):
+            exp_weights[masks[:, j], j] = 1.
+    benchmark_model_time = dt.datetime.now() - start
+
+    np.testing.assert_array_almost_equal(calc_weights, exp_weights)
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+if __name__ == '__main__':
+    benchmark_build_rank(3000, 1000, 300)
+    benchmark_build_rank_with_group(3000, 1000, 10, 30)
--- a/alphamind/benchmarks/settlement/__init__.py
+++ b/alphamind/benchmarks/settlement/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/benchmarks/settlement/simplesettle.py
+++ b/alphamind/benchmarks/settlement/simplesettle.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
-"""
-
-import datetime as dt
-import numpy as np
-import pandas as pd
-from alphamind.settlement.simplesettle import simple_settle
-
-
-def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
-    print("-" * 60)
-    print("Starting simple settle benchmarking")
-    print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops))
-
-    weights = np.random.randn(n_samples, n_portfolios)
-    ret_series = np.random.randn(n_samples)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        calc_ret = simple_settle(weights, ret_series)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    ret_series.shape = -1, 1
-    for _ in range(n_loops):
-        exp_ret = (weights * ret_series).sum(axis=0)
-    benchmark_model_time = dt.datetime.now() - start
-
-    np.testing.assert_array_almost_equal(calc_ret, exp_ret)
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None:
-    print("-" * 60)
-    print("Starting simple settle with group-by values benchmarking")
-    print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups))
-
-    weights = np.random.randn(n_samples, n_portfolios)
-    ret_series = np.random.randn(n_samples)
-    groups = np.random.randint(n_groups, size=n_samples)
-
-    start = dt.datetime.now()
-    for _ in range(n_loops):
-        calc_ret = simple_settle(weights, ret_series, groups=groups)
-    impl_model_time = dt.datetime.now() - start
-
-    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
-
-    start = dt.datetime.now()
-    ret_series.shape = -1, 1
-    for _ in range(n_loops):
-        ret_mat = weights * ret_series
-        exp_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
-    benchmark_model_time = dt.datetime.now() - start
-
-    np.testing.assert_array_almost_equal(calc_ret, exp_ret)
-
-    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
-
-
-if __name__ == '__main__':
-    benchmark_simple_settle(3000, 3, 1000)
-    benchmark_simple_settle_with_group(3000, 3, 1000, 30)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
+"""
+
+import datetime as dt
+import numpy as np
+import pandas as pd
+from alphamind.settlement.simplesettle import simple_settle
+
+
+def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
+    print("-" * 60)
+    print("Starting simple settle benchmarking")
+    print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops))
+
+    weights = np.random.randn(n_samples, n_portfolios)
+    ret_series = np.random.randn(n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        calc_ret = simple_settle(weights, ret_series)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    ret_series.shape = -1, 1
+    for _ in range(n_loops):
+        exp_ret = (weights * ret_series).sum(axis=0)
+    benchmark_model_time = dt.datetime.now() - start
+
+    np.testing.assert_array_almost_equal(calc_ret, exp_ret)
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None:
+    print("-" * 60)
+    print("Starting simple settle with group-by values benchmarking")
+    print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups))
+
+    weights = np.random.randn(n_samples, n_portfolios)
+    ret_series = np.random.randn(n_samples)
+    groups = np.random.randint(n_groups, size=n_samples)
+
+    start = dt.datetime.now()
+    for _ in range(n_loops):
+        calc_ret = simple_settle(weights, ret_series, groups=groups)
+    impl_model_time = dt.datetime.now() - start
+
+    print('{0:20s}: {1}'.format('Implemented model', impl_model_time))
+
+    start = dt.datetime.now()
+    ret_series.shape = -1, 1
+    for _ in range(n_loops):
+        ret_mat = weights * ret_series
+        exp_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
+    benchmark_model_time = dt.datetime.now() - start
+
+    np.testing.assert_array_almost_equal(calc_ret, exp_ret)
+
+    print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
+
+
+if __name__ == '__main__':
+    benchmark_simple_settle(3000, 3, 1000)
+    benchmark_simple_settle_with_group(3000, 3, 1000, 30)
--- a/alphamind/data/__init__.py
+++ b/alphamind/data/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import numpy as np
-from numpy.linalg import solve
-from alphamind.aggregate import groupby
-
-
-def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
-    if groups is not None:
-        res = np.zeros(y.shape)
-        groups_ids = groupby(groups)
-
-        for curr_idx in groups_ids:
-            curr_x = x[curr_idx]
-            curr_y = y[curr_idx]
-            b = ls_fit(x[curr_idx], y[curr_idx])
-            res[curr_idx] = ls_res(curr_x, curr_y, b)
-        return res
-    else:
-        b = ls_fit(x, y)
-        return ls_res(x, y, b)
-
-
-def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
-    x_bar = x.T
-    b = solve(x_bar @ x, x_bar @ y)
-    return b
-
-
-def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
-    return y - x @ b
-
-
-if __name__ == '__main__':
-
-    x = np.random.randn(3000, 3)
-    y = np.random.randn(3000, 2)
-    groups = np.random.randint(30, size=3000)
-
-    print(neutralize(x, y, groups))
\ No newline at end of file
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import numpy as np
+from numpy.linalg import solve
+from typing import Tuple
+from alphamind.aggregate import groupby
+
+
+def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None) -> Tuple[np.ndarray, np.ndarray]:
+    if groups is not None:
+        res = np.zeros(y.shape)
+        groups_ids = groupby(groups)
+
+        for curr_idx in groups_ids:
+            curr_x = x[curr_idx]
+            curr_y = y[curr_idx]
+            b = ls_fit(x[curr_idx], y[curr_idx])
+            res[curr_idx] = ls_res(curr_x, curr_y, b)
+        return res
+    else:
+        b = ls_fit(x, y)
+        return ls_res(x, y, b)
+
+
+def ls_fit(x: np.ndarray, y: np.ndarray) -> np.ndarray:
+    x_bar = x.T
+    b = solve(x_bar @ x, x_bar @ y)
+    return b
+
+
+def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
+    return y - x @ b
+
+
+def ls_explained(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
+    pass
+
+
+if __name__ == '__main__':
+
+    x = np.random.randn(3000, 3)
+    y = np.random.randn(3000, 2)
+    groups = np.random.randint(30, size=3000)
+
+    b = ls_fit(x, y)
+    ls_explained(x, y, b)
\ No newline at end of file
--- a/alphamind/data/standardize.py
+++ b/alphamind/data/standardize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import numpy as np
-
-from alphamind.aggregate import transform
-
-
-def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
-
-    if groups is not None:
-        mean_values = transform(groups, x, 'mean')
-        std_values = transform(groups, x, 'std')
-
-        return (x - mean_values) / std_values
-    else:
-        return (x - x.mean(axis=0)) / x.std(axis=0)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import numpy as np
+
+from alphamind.aggregate import transform
+
+
+def standardize(x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+
+    if groups is not None:
+        mean_values = transform(groups, x, 'mean')
+        std_values = transform(groups, x, 'std')
+
+        return (x - mean_values) / std_values
+    else:
+        return (x - x.mean(axis=0)) / x.std(axis=0)
--- a/alphamind/data/winsorize.py
+++ b/alphamind/data/winsorize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import numpy as np
-
-from alphamind.aggregate import transform
-
-
-def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
-
-    if groups is not None:
-        mean_values = transform(groups, x, 'mean')
-        std_values = transform(groups, x, 'std')
-    else:
-        std_values = x.std(axis=0)
-        mean_values = x.mean(axis=0)
-
-    ubound = mean_values + num_stds * std_values
-    lbound = mean_values - num_stds * std_values
-
-    res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x))
-
-    return res
-
-
-if __name__ == '__main__':
-    x = np.random.randn(3000, 10)
-    groups = np.random.randint(0, 20, size=3000)
-
-    for _ in range(1000):
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import numpy as np
+
+from alphamind.aggregate import transform
+
+
+def winsorize_normal(x: np.ndarray, num_stds: int=3, groups: np.ndarray=None) -> np.ndarray:
+
+    if groups is not None:
+        mean_values = transform(groups, x, 'mean')
+        std_values = transform(groups, x, 'std')
+    else:
+        std_values = x.std(axis=0)
+        mean_values = x.mean(axis=0)
+
+    ubound = mean_values + num_stds * std_values
+    lbound = mean_values - num_stds * std_values
+
+    res = np.where(x > ubound, ubound, np.where(x < lbound, lbound, x))
+
+    return res
+
+
+if __name__ == '__main__':
+    x = np.random.randn(3000, 10)
+    groups = np.random.randint(0, 20, size=3000)
+
+    for _ in range(1000):
        winsorize_normal(x, 2, groups)
\ No newline at end of file
--- a/alphamind/portfolio/__init__.py
+++ b/alphamind/portfolio/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-26
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-26
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/portfolio/impl.pyx
+++ b/alphamind/portfolio/impl.pyx
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-29
-
-@author: cheng.li
-"""
-
-import numpy as np
-cimport numpy as np
-cimport cython
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
-
-    cdef size_t length = index.shape[0]
-    cdef size_t width = index.shape[1]
-    cdef size_t i
-    cdef size_t j
-    cdef unsigned char* mat_ptr = &mat[0, 0]
-    cdef long long* index_ptr = &index[0, 0]
-    cdef size_t k
-
-    for i in range(length):
-        k = i * width
-        for j in range(width):
-            mat_ptr[index_ptr[k + j] * width + j] = True
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-@cython.initializedcheck(False)
-cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
-
-    cdef size_t length = index.shape[0]
-    cdef size_t width = index.shape[1]
-    cdef size_t i
-    cdef size_t j
-    cdef double* mat_ptr = &mat[0, 0]
-    cdef long long* index_ptr = &index[0, 0]
-    cdef size_t k
-
-    for i in range(length):
-        k = i * width
-        for j in range(width):
-            mat_ptr[index_ptr[k + j] * width + j] = val
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-29
+
+@author: cheng.li
+"""
+
+import numpy as np
+cimport numpy as np
+cimport cython
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef void set_value_bool(unsigned char[:, :] mat, long long[:, :] index):
+
+    cdef size_t length = index.shape[0]
+    cdef size_t width = index.shape[1]
+    cdef size_t i
+    cdef size_t j
+    cdef unsigned char* mat_ptr = &mat[0, 0]
+    cdef long long* index_ptr = &index[0, 0]
+    cdef size_t k
+
+    for i in range(length):
+        k = i * width
+        for j in range(width):
+            mat_ptr[index_ptr[k + j] * width + j] = True
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.initializedcheck(False)
+cpdef void set_value_double(double[:, :] mat, long long[:, :] index, double val):
+
+    cdef size_t length = index.shape[0]
+    cdef size_t width = index.shape[1]
+    cdef size_t i
+    cdef size_t j
+    cdef double* mat_ptr = &mat[0, 0]
+    cdef long long* index_ptr = &index[0, 0]
+    cdef size_t k
+
+    for i in range(length):
+        k = i * width
+        for j in range(width):
+            mat_ptr[index_ptr[k + j] * width + j] = val
--- a/alphamind/portfolio/rankbuilder.py
+++ b/alphamind/portfolio/rankbuilder.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-26
-
-@author: cheng.li
-"""
-
-import numpy as np
-from numpy import zeros
-from alphamind.aggregate import groupby
-from alphamind.portfolio.impl import set_value_bool
-from alphamind.portfolio.impl import set_value_double
-
-
-def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
-
-    if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
-        """ fast path methods for single column er"""
-        neg_er = -er.flatten()
-        length = len(neg_er)
-        weights = zeros((length, 1))
-        if groups is not None:
-            group_ids = groupby(groups)
-            masks = zeros(length, dtype=bool)
-            for current_index in group_ids:
-                current_ordering = neg_er[current_index].argsort()
-                masks[current_index[current_ordering[:use_rank]]] = True
-            weights[masks] = 1.
-        else:
-            ordering = neg_er.argsort()
-            weights[ordering[:use_rank]] = 1.
-        return weights
-    else:
-        length = er.shape[0]
-        width = er.shape[1]
-        neg_er = -er
-        weights = zeros((length, width))
-
-        if groups is not None:
-            group_ids = groupby(groups)
-            masks = zeros((length, width), dtype=bool)
-            for current_index in group_ids:
-                current_ordering = neg_er[current_index].argsort(axis=0)
-                total_index = current_index[current_ordering[:use_rank]]
-                set_value_bool(masks.view(dtype=np.uint8), total_index)
-
-            for j in range(width):
-                weights[masks[:, j], j] = 1.
-        else:
-            ordering = neg_er.argsort(axis=0)
-            set_value_double(weights, ordering[:use_rank], 1.)
-        return weights
-
-
-if __name__ == '__main__':
-    n_sample = 6
-    n_groups = 3
-
-    x = np.random.randn(n_sample)
-    groups = np.array([1, 1, 2, 1, 0, 2])
-    print(groups)
-    print(groupby(groups))
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-26
+
+@author: cheng.li
+"""
+
+import numpy as np
+from numpy import zeros
+from alphamind.aggregate import groupby
+from alphamind.portfolio.impl import set_value_bool
+from alphamind.portfolio.impl import set_value_double
+
+
+def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.ndarray:
+
+    if er.ndim == 1 or (er.shape[0] == 1 or er.shape[1] == 1):
+        """ fast path methods for single column er"""
+        neg_er = -er.flatten()
+        length = len(neg_er)
+        weights = zeros((length, 1))
+        if groups is not None:
+            group_ids = groupby(groups)
+            masks = zeros(length, dtype=bool)
+            for current_index in group_ids:
+                current_ordering = neg_er[current_index].argsort()
+                masks[current_index[current_ordering[:use_rank]]] = True
+            weights[masks] = 1.
+        else:
+            ordering = neg_er.argsort()
+            weights[ordering[:use_rank]] = 1.
+        return weights
+    else:
+        length = er.shape[0]
+        width = er.shape[1]
+        neg_er = -er
+        weights = zeros((length, width))
+
+        if groups is not None:
+            group_ids = groupby(groups)
+            masks = zeros((length, width), dtype=bool)
+            for current_index in group_ids:
+                current_ordering = neg_er[current_index].argsort(axis=0)
+                total_index = current_index[current_ordering[:use_rank]]
+                set_value_bool(masks.view(dtype=np.uint8), total_index)
+
+            for j in range(width):
+                weights[masks[:, j], j] = 1.
+        else:
+            ordering = neg_er.argsort(axis=0)
+            set_value_double(weights, ordering[:use_rank], 1.)
+        return weights
+
+
+if __name__ == '__main__':
+    n_sample = 6
+    n_groups = 3
+
+    x = np.random.randn(n_sample)
+    groups = np.array([1, 1, 2, 1, 0, 2])
+    print(groups)
+    print(groupby(groups))
    print(rank_build(x, 1, groups))
\ No newline at end of file
--- a/alphamind/settlement/__init__.py
+++ b/alphamind/settlement/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/settlement/simplesettle.py
+++ b/alphamind/settlement/simplesettle.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
-"""
-
-import numpy as np
-from alphamind.aggregate import aggregate
-
-
-def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
-
-    if ret_series.ndim > 1:
-        ret_series = ret_series.flatten()
-
-    ret_mat = (ret_series * weights.T).T
-    if groups is not None:
-        return aggregate(groups, ret_mat, 'sum')
-    else:
-        return ret_mat.sum(axis=0)
-
-
-if __name__ == '__main__':
-    from alphamind.aggregate import group_mapping_test
-
-    s = np.random.randint(2, 5, size=6)
-    print(s)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
+"""
+
+import numpy as np
+from alphamind.aggregate import aggregate
+
+
+def simple_settle(weights: np.ndarray, ret_series: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
+
+    if ret_series.ndim > 1:
+        ret_series = ret_series.flatten()
+
+    ret_mat = (ret_series * weights.T).T
+    if groups is not None:
+        return aggregate(groups, ret_mat, 'sum')
+    else:
+        return ret_mat.sum(axis=0)
+
+
+if __name__ == '__main__':
+    from alphamind.aggregate import group_mapping_test
+
+    s = np.random.randint(2, 5, size=6)
+    print(s)
    print(group_mapping_test(s))
\ No newline at end of file
--- a/alphamind/tests/__init__.py
+++ b/alphamind/tests/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
--- a/alphamind/tests/data/test_neutralize.py
+++ b/alphamind/tests/data/test_neutralize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import unittest
-import numpy as np
-from sklearn.linear_model import LinearRegression
-from alphamind.data.neutralize import neutralize
-
-
-class TestNeutralize(unittest.TestCase):
-
-    def test_neutralize(self):
-
-        y = np.random.randn(3000, 4)
-        x = np.random.randn(3000, 10)
-
-        calc_res = neutralize(x, y)
-
-        model = LinearRegression(fit_intercept=False)
-        model.fit(x, y)
-
-        exp_res = y - x @ model.coef_.T
-
-        np.testing.assert_array_almost_equal(calc_res, exp_res)
-
-    def test_neutralize_with_group(self):
-        y = np.random.randn(3000, 4)
-        x = np.random.randn(3000, 10)
-        groups = np.random.randint(30, size=3000)
-
-        calc_res = neutralize(x, y, groups)
-
-        model = LinearRegression(fit_intercept=False)
-        for i in range(30):
-            curr_x = x[groups == i]
-            curr_y = y[groups == i]
-            model.fit(curr_x, curr_y)
-            exp_res = curr_y - curr_x @ model.coef_.T
-            np.testing.assert_array_almost_equal(calc_res[groups ==i ], exp_res)
-
-
-if __name__ == '__main__':
-    unittest.main()
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+from sklearn.linear_model import LinearRegression
+from alphamind.data.neutralize import neutralize
+
+
+class TestNeutralize(unittest.TestCase):
+
+    def test_neutralize(self):
+
+        y = np.random.randn(3000, 4)
+        x = np.random.randn(3000, 10)
+
+        calc_res = neutralize(x, y)
+
+        model = LinearRegression(fit_intercept=False)
+        model.fit(x, y)
+
+        exp_res = y - x @ model.coef_.T
+
+        np.testing.assert_array_almost_equal(calc_res, exp_res)
+
+    def test_neutralize_with_group(self):
+        y = np.random.randn(3000, 4)
+        x = np.random.randn(3000, 10)
+        groups = np.random.randint(30, size=3000)
+
+        calc_res = neutralize(x, y, groups)
+
+        model = LinearRegression(fit_intercept=False)
+        for i in range(30):
+            curr_x = x[groups == i]
+            curr_y = y[groups == i]
+            model.fit(curr_x, curr_y)
+            exp_res = curr_y - curr_x @ model.coef_.T
+            np.testing.assert_array_almost_equal(calc_res[groups ==i ], exp_res)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/alphamind/tests/data/test_standardize.py
+++ b/alphamind/tests/data/test_standardize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import unittest
-import numpy as np
-import pandas as pd
-from scipy.stats import zscore
-from alphamind.data.standardize import standardize
-
-
-class TestStandardize(unittest.TestCase):
-
-    def test_standardize(self):
-
-        x = np.random.randn(3000, 10)
-
-        calc_zscore = standardize(x)
-        exp_zscore = zscore(x)
-
-        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
-        
-    def test_standardize_with_group(self):
-        x = np.random.randn(3000, 10)
-        groups = np.random.randint(10, 30, size=3000)
-
-        calc_zscore = standardize(x, groups)
-        exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
-        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
-
-
-if __name__ == '__main__':
-    unittest.main()
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+import pandas as pd
+from scipy.stats import zscore
+from alphamind.data.standardize import standardize
+
+
+class TestStandardize(unittest.TestCase):
+
+    def test_standardize(self):
+
+        x = np.random.randn(3000, 10)
+
+        calc_zscore = standardize(x)
+        exp_zscore = zscore(x)
+
+        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
+        
+    def test_standardize_with_group(self):
+        x = np.random.randn(3000, 10)
+        groups = np.random.randint(10, 30, size=3000)
+
+        calc_zscore = standardize(x, groups)
+        exp_zscore = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
+        np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/alphamind/tests/data/test_winsorize.py
+++ b/alphamind/tests/data/test_winsorize.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import unittest
-import numpy as np
-import pandas as pd
-from alphamind.data.winsorize import winsorize_normal
-
-
-class TestWinsorize(unittest.TestCase):
-
-    def test_winsorize_normal(self):
-        num_stds = 2
-
-        x = np.random.randn(3000, 10)
-
-        calc_winsorized = winsorize_normal(x, num_stds)
-
-        std_values = x.std(axis=0)
-        mean_value = x.mean(axis=0)
-
-        lower_bound = mean_value - num_stds * std_values
-        upper_bound = mean_value + num_stds * std_values
-
-        for i in range(np.size(calc_winsorized, 1)):
-            col_data = x[:, i]
-            col_data[col_data > upper_bound[i]] = upper_bound[i]
-            col_data[col_data < lower_bound[i]] = lower_bound[i]
-
-            calculated_col = calc_winsorized[:, i]
-            np.testing.assert_array_almost_equal(col_data, calculated_col)
-
-    def test_winsorize_normal_with_group(self):
-        num_stds = 2
-        x = np.random.randn(3000, 10)
-        groups = np.random.randint(10, 30, size=3000)
-
-        cal_winsorized = winsorize_normal(x, num_stds, groups)
-
-        def impl(x):
-            std_values = x.std(axis=0)
-            mean_value = x.mean(axis=0)
-
-            lower_bound = mean_value - num_stds * std_values
-            upper_bound = mean_value + num_stds * std_values
-
-            res = np.where(x > upper_bound, upper_bound, x)
-            res = np.where(res < lower_bound, lower_bound, res)
-            return res
-
-        exp_winsorized = pd.DataFrame(x).groupby(groups).transform(impl).values
-        np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
-
-
-if __name__ == "__main__":
-    unittest.main()
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+import pandas as pd
+from alphamind.data.winsorize import winsorize_normal
+
+
+class TestWinsorize(unittest.TestCase):
+
+    def test_winsorize_normal(self):
+        num_stds = 2
+
+        x = np.random.randn(3000, 10)
+
+        calc_winsorized = winsorize_normal(x, num_stds)
+
+        std_values = x.std(axis=0)
+        mean_value = x.mean(axis=0)
+
+        lower_bound = mean_value - num_stds * std_values
+        upper_bound = mean_value + num_stds * std_values
+
+        for i in range(np.size(calc_winsorized, 1)):
+            col_data = x[:, i]
+            col_data[col_data > upper_bound[i]] = upper_bound[i]
+            col_data[col_data < lower_bound[i]] = lower_bound[i]
+
+            calculated_col = calc_winsorized[:, i]
+            np.testing.assert_array_almost_equal(col_data, calculated_col)
+
+    def test_winsorize_normal_with_group(self):
+        num_stds = 2
+        x = np.random.randn(3000, 10)
+        groups = np.random.randint(10, 30, size=3000)
+
+        cal_winsorized = winsorize_normal(x, num_stds, groups)
+
+        def impl(x):
+            std_values = x.std(axis=0)
+            mean_value = x.mean(axis=0)
+
+            lower_bound = mean_value - num_stds * std_values
+            upper_bound = mean_value + num_stds * std_values
+
+            res = np.where(x > upper_bound, upper_bound, x)
+            res = np.where(res < lower_bound, lower_bound, res)
+            return res
+
+        exp_winsorized = pd.DataFrame(x).groupby(groups).transform(impl).values
+        np.testing.assert_array_almost_equal(cal_winsorized, exp_winsorized)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/alphamind/tests/portfolio/__init__.py
+++ b/alphamind/tests/portfolio/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-27
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-27
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/tests/portfolio/test_rankbuild.py
+++ b/alphamind/tests/portfolio/test_rankbuild.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-27
-
-@author: cheng.li
-"""
-
-import unittest
-import numpy as np
-import pandas as pd
-from alphamind.portfolio.rankbuilder import rank_build
-
-
-class TestRankBuild(unittest.TestCase):
-
-    def test_rank_build(self):
-
-        n_samples = 3000
-        n_included = 300
-
-        n_portfolios = range(10)
-
-        for n_portfolio in n_portfolios:
-            x = np.random.randn(n_samples, n_portfolio)
-
-            calc_weights = rank_build(x, n_included)
-
-            expected_weights = np.zeros((len(x), n_portfolio))
-            masks = (-x).argsort(axis=0).argsort(axis=0) < n_included
-
-            for j in range(x.shape[1]):
-                expected_weights[masks[:, j], j] = 1.
-
-            np.testing.assert_array_almost_equal(calc_weights, expected_weights)
-
-    def test_rank_build_with_group(self):
-
-        n_samples = 3000
-        n_include = 10
-        n_groups = 30
-
-        n_portfolios = range(10)
-
-        for n_portfolio in n_portfolios:
-
-            x = np.random.randn(n_samples, n_portfolio)
-            groups = np.random.randint(n_groups, size=n_samples)
-
-            calc_weights = rank_build(x, n_include, groups)
-
-            grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
-            expected_weights = np.zeros((len(x), n_portfolio))
-            masks = (grouped_ordering <= n_include).values
-            for j in range(x.shape[1]):
-                expected_weights[masks[:, j], j] = 1.
-
-            np.testing.assert_array_almost_equal(calc_weights, expected_weights)
-
-
-if __name__ == '__main__':
-    unittest.main()
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-27
+
+@author: cheng.li
+"""
+
+import unittest
+import numpy as np
+import pandas as pd
+from alphamind.portfolio.rankbuilder import rank_build
+
+
+class TestRankBuild(unittest.TestCase):
+
+    def test_rank_build(self):
+
+        n_samples = 3000
+        n_included = 300
+
+        n_portfolios = range(10)
+
+        for n_portfolio in n_portfolios:
+            x = np.random.randn(n_samples, n_portfolio)
+
+            calc_weights = rank_build(x, n_included)
+
+            expected_weights = np.zeros((len(x), n_portfolio))
+            masks = (-x).argsort(axis=0).argsort(axis=0) < n_included
+
+            for j in range(x.shape[1]):
+                expected_weights[masks[:, j], j] = 1.
+
+            np.testing.assert_array_almost_equal(calc_weights, expected_weights)
+
+    def test_rank_build_with_group(self):
+
+        n_samples = 3000
+        n_include = 10
+        n_groups = 30
+
+        n_portfolios = range(10)
+
+        for n_portfolio in n_portfolios:
+
+            x = np.random.randn(n_samples, n_portfolio)
+            groups = np.random.randint(n_groups, size=n_samples)
+
+            calc_weights = rank_build(x, n_include, groups)
+
+            grouped_ordering = pd.DataFrame(-x).groupby(groups).rank()
+            expected_weights = np.zeros((len(x), n_portfolio))
+            masks = (grouped_ordering <= n_include).values
+            for j in range(x.shape[1]):
+                expected_weights[masks[:, j], j] = 1.
+
+            np.testing.assert_array_almost_equal(calc_weights, expected_weights)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/alphamind/tests/settlement/__init__.py
+++ b/alphamind/tests/settlement/__init__.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
 """
\ No newline at end of file
--- a/alphamind/tests/settlement/test_simplesettle.py
+++ b/alphamind/tests/settlement/test_simplesettle.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-28
-
-@author: cheng.li
-"""
-
-
-import unittest
-import numpy as np
-import pandas as pd
-from alphamind.settlement.simplesettle import simple_settle
-
-
-class TestSimpleSettle(unittest.TestCase):
-
-    def test_simples_settle(self):
-        n_samples = 3000
-        n_portfolio = 3
-
-        weights = np.random.randn(n_samples, n_portfolio)
-        ret_series = np.random.randn(n_samples)
-
-        calc_ret = simple_settle(weights, ret_series)
-
-        ret_series.shape = -1, 1
-        expected_ret = (weights * ret_series).sum(axis=0)
-
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-        ret_series = np.random.randn(n_samples, 1)
-
-        calc_ret = simple_settle(weights, ret_series)
-
-        expected_ret = (weights * ret_series).sum(axis=0)
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-    def test_simple_settle_with_group(self):
-        n_samples = 3000
-        n_portfolio = 3
-        n_groups = 30
-
-        weights = np.random.randn(n_samples, n_portfolio)
-        ret_series = np.random.randn(n_samples)
-        groups = np.random.randint(n_groups, size=n_samples)
-
-        calc_ret = simple_settle(weights, ret_series, groups)
-
-        ret_series.shape = -1, 1
-        ret_mat = weights * ret_series
-        expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
-
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-        ret_series = np.random.randn(n_samples, 1)
-
-        calc_ret = simple_settle(weights, ret_series, groups)
-
-        ret_mat = weights * ret_series
-        expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
-
-        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
-
-
-if __name__ == '__main__':
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-28
+
+@author: cheng.li
+"""
+
+
+import unittest
+import numpy as np
+import pandas as pd
+from alphamind.settlement.simplesettle import simple_settle
+
+
+class TestSimpleSettle(unittest.TestCase):
+
+    def test_simples_settle(self):
+        n_samples = 3000
+        n_portfolio = 3
+
+        weights = np.random.randn(n_samples, n_portfolio)
+        ret_series = np.random.randn(n_samples)
+
+        calc_ret = simple_settle(weights, ret_series)
+
+        ret_series.shape = -1, 1
+        expected_ret = (weights * ret_series).sum(axis=0)
+
+        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+
+        ret_series = np.random.randn(n_samples, 1)
+
+        calc_ret = simple_settle(weights, ret_series)
+
+        expected_ret = (weights * ret_series).sum(axis=0)
+        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+
+    def test_simple_settle_with_group(self):
+        n_samples = 3000
+        n_portfolio = 3
+        n_groups = 30
+
+        weights = np.random.randn(n_samples, n_portfolio)
+        ret_series = np.random.randn(n_samples)
+        groups = np.random.randint(n_groups, size=n_samples)
+
+        calc_ret = simple_settle(weights, ret_series, groups)
+
+        ret_series.shape = -1, 1
+        ret_mat = weights * ret_series
+        expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
+
+        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+
+        ret_series = np.random.randn(n_samples, 1)
+
+        calc_ret = simple_settle(weights, ret_series, groups)
+
+        ret_mat = weights * ret_series
+        expected_ret = pd.DataFrame(ret_mat).groupby(groups, sort=False).sum().values
+
+        np.testing.assert_array_almost_equal(calc_ret, expected_ret)
+
+
+if __name__ == '__main__':
    unittest.main()
\ No newline at end of file
--- a/alphamind/tests/test_suite.py
+++ b/alphamind/tests/test_suite.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-from alphamind.utilities import add_parent_path
-
-add_parent_path(__file__, 3)
-
-from alphamind.tests.data.test_neutralize import TestNeutralize
-from alphamind.tests.data.test_standardize import TestStandardize
-from alphamind.tests.data.test_winsorize import TestWinsorize
-from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
-from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
-from alphamind.utilities import alpha_logger
-from alphamind.utilities import TestRunner
-
-
-if __name__ == '__main__':
-    runner = TestRunner([TestNeutralize,
-                         TestStandardize,
-                         TestWinsorize,
-                         TestRankBuild,
-                         TestSimpleSettle],
-                        alpha_logger)
-    runner.run()
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+from alphamind.utilities import add_parent_path
+
+add_parent_path(__file__, 3)
+
+from alphamind.tests.data.test_neutralize import TestNeutralize
+from alphamind.tests.data.test_standardize import TestStandardize
+from alphamind.tests.data.test_winsorize import TestWinsorize
+from alphamind.tests.portfolio.test_rankbuild import TestRankBuild
+from alphamind.tests.settlement.test_simplesettle import TestSimpleSettle
+from alphamind.utilities import alpha_logger
+from alphamind.utilities import TestRunner
+
+
+if __name__ == '__main__':
+    runner = TestRunner([TestNeutralize,
+                         TestStandardize,
+                         TestWinsorize,
+                         TestRankBuild,
+                         TestSimpleSettle],
+                        alpha_logger)
+    runner.run()
--- a/alphamind/utilities.py
+++ b/alphamind/utilities.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import os
-import sys
-import logging
-import unittest
-
-
-alpha_logger = logging.getLogger('ALPHA_MIND')
-ch = logging.StreamHandler()
-formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-ch.setFormatter(formatter)
-alpha_logger.addHandler(ch)
-alpha_logger.setLevel(logging.INFO)
-
-
-def add_parent_path(name, level):
-    current_path = os.path.abspath(name)
-    sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level]))
-
-
-class TestRunner(object):
-
-    def __init__(self,
-                 test_cases,
-                 logger):
-
-        self.suite = unittest.TestSuite()
-        self.logger = logger
-
-        for case in test_cases:
-            tests = unittest.TestLoader().loadTestsFromTestCase(case)
-            self.suite.addTests(tests)
-
-    def run(self):
-
-        self.logger.info('Python ' + sys.version)
-
-        res = unittest.TextTestRunner(verbosity=3).run(self.suite)
-        if len(res.errors) >= 1 or len(res.failures) >= 1:
-            sys.exit(-1)
-        else:
-            sys.exit(0)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import os
+import sys
+import logging
+import unittest
+
+
+alpha_logger = logging.getLogger('ALPHA_MIND')
+ch = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+alpha_logger.addHandler(ch)
+alpha_logger.setLevel(logging.INFO)
+
+
+def add_parent_path(name, level):
+    current_path = os.path.abspath(name)
+    sys.path.append(os.path.sep.join(current_path.split(os.path.sep)[:-level]))
+
+
+class TestRunner(object):
+
+    def __init__(self,
+                 test_cases,
+                 logger):
+
+        self.suite = unittest.TestSuite()
+        self.logger = logger
+
+        for case in test_cases:
+            tests = unittest.TestLoader().loadTestsFromTestCase(case)
+            self.suite.addTests(tests)
+
+    def run(self):
+
+        self.logger.info('Python ' + sys.version)
+
+        res = unittest.TextTestRunner(verbosity=3).run(self.suite)
+        if len(res.errors) >= 1 or len(res.failures) >= 1:
+            sys.exit(-1)
+        else:
+            sys.exit(0)
--- a/requirements.txt
+++ b/requirements.txt
-cython >= 0.25.2
-numpy >= 1.12.1
-scikit-learn >= 0.18.1
-scipy >= 0.19.0
+cython >= 0.25.2
+numpy >= 1.12.1
+scikit-learn >= 0.18.1
+scipy >= 0.19.0
 pandas >= 0.19.2
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
-# -*- coding: utf-8 -*-
-"""
-Created on 2017-4-25
-
-@author: cheng.li
-"""
-
-import platform
-import sys
-from setuptools import setup
-from setuptools import find_packages
-from distutils.extension import Extension
-import numpy as np
-import Cython
-from Cython.Build import cythonize
-Cython.Compiler.Options.annotate = True
-
-VERSION = "0.1.0"
-
-if "--line_trace" in sys.argv:
-    line_trace = True
-    print("Build with line trace enabled ...")
-    sys.argv.remove("--line_trace")
-else:
-    line_trace = False
-
-
-ext_modules = ['alphamind/aggregate.pyx',
-               'alphamind/portfolio/impl.pyx']
-
-
-def generate_extensions(ext_modules, line_trace=False):
-
-    extensions = []
-
-    if line_trace:
-        print("define cython trace to True ...")
-        define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
-    else:
-        define_macros = []
-
-    for pyxfile in ext_modules:
-        ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
-                        sources=[pyxfile],
-                        define_macros=define_macros)
-        extensions.append(ext)
-    return extensions
-
-
-if platform.system() != "Windows":
-    import multiprocessing
-    n_cpu = multiprocessing.cpu_count()
-else:
-    n_cpu = 0
-
-ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
-                                 compiler_directives={'embedsignature': True, 'linetrace': line_trace},
-                                 nthreads=n_cpu)
-
-
-setup(
-    name='Alpha-Mind',
-    version=VERSION,
-    packages=find_packages(),
-    url='',
-    license='',
-    author='wegamekinglc',
-    author_email='',
-    ext_modules=ext_modules_settings,
-    include_dirs=[np.get_include()],
-    description=''
-)
+# -*- coding: utf-8 -*-
+"""
+Created on 2017-4-25
+
+@author: cheng.li
+"""
+
+import platform
+import sys
+from setuptools import setup
+from setuptools import find_packages
+from distutils.extension import Extension
+import numpy as np
+import Cython
+from Cython.Build import cythonize
+Cython.Compiler.Options.annotate = True
+
+VERSION = "0.1.0"
+
+if "--line_trace" in sys.argv:
+    line_trace = True
+    print("Build with line trace enabled ...")
+    sys.argv.remove("--line_trace")
+else:
+    line_trace = False
+
+
+ext_modules = ['alphamind/aggregate.pyx',
+               'alphamind/portfolio/impl.pyx']
+
+
+def generate_extensions(ext_modules, line_trace=False):
+
+    extensions = []
+
+    if line_trace:
+        print("define cython trace to True ...")
+        define_macros = [('CYTHON_TRACE', 1), ('CYTHON_TRACE_NOGIL', 1)]
+    else:
+        define_macros = []
+
+    for pyxfile in ext_modules:
+        ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
+                        sources=[pyxfile],
+                        define_macros=define_macros)
+        extensions.append(ext)
+    return extensions
+
+
+if platform.system() != "Windows":
+    import multiprocessing
+    n_cpu = multiprocessing.cpu_count()
+else:
+    n_cpu = 0
+
+ext_modules_settings = cythonize(generate_extensions(ext_modules, line_trace),
+                                 compiler_directives={'embedsignature': True, 'linetrace': line_trace},
+                                 nthreads=n_cpu)
+
+
+setup(
+    name='Alpha-Mind',
+    version=VERSION,
+    packages=find_packages(),
+    url='',
+    license='',
+    author='wegamekinglc',
+    author_email='',
+    ext_modules=ext_modules_settings,
+    include_dirs=[np.get_include()],
+    description=''
+)