Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
258b0348
Commit
258b0348
authored
Apr 27, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added rank build tests and benchmark
parent
b971f376
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
198 additions
and
52 deletions
+198
-52
aggregate.pyx
alphamind/aggregate.pyx
+55
-47
benchmarks.py
alphamind/benchmarks/benchmarks.py
+8
-0
__init__.py
alphamind/benchmarks/portfolio/__init__.py
+6
-0
rankbuild.py
alphamind/benchmarks/portfolio/rankbuild.py
+65
-0
rankbuilder.py
alphamind/portfolio/rankbuilder.py
+8
-5
__init__.py
alphamind/tests/portfolio/__init__.py
+6
-0
test_rankbuild.py
alphamind/tests/portfolio/test_rankbuild.py
+50
-0
No files found.
alphamind/aggregate.pyx
View file @
258b0348
...
...
@@ -10,6 +10,8 @@ from numpy import zeros
from numpy import asarray
cimport cython
from libc.math cimport sqrt
from libc.stdlib cimport calloc
from libc.stdlib cimport free
@cython.boundscheck(False)
...
...
@@ -26,69 +28,76 @@ cdef int max_groups(long* groups, size_t length) nogil:
curr_max = curr
return curr_max
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double
[:, :] agg_mean(long* groups, double* x, size_t length, size_t width)
:
cdef double
* agg_mean(long* groups, double* x, size_t length, size_t width) nogil
:
cdef long max_g = max_groups(groups, length)
cdef double[:, :] res = zeros((max_g+1, width))
cdef double* res_ptr = &res[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef double* res_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
cdef size_t i
cdef size_t j
cdef size_t loop_idx1
cdef size_t loop_idx2
cdef long curr
with nogil:
for i in range(length):
for i in range(length):
loop_idx1 = i*width
loop_idx2 = groups[i]*width
for j in range(width):
res_ptr[loop_idx2 + j] += x[loop_idx1 + j]
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
loop_idx1 = i*width
for j in range(width):
res_ptr[groups[i]*width + j] += x[i*width + j]
bin_count_ptr[groups[i]] += 1
res_ptr[loop_idx1 + j] /= curr
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
res_ptr[i*width + j] /= curr
return res
free(bin_count_ptr)
return res_ptr
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
@cython.initializedcheck(False)
cdef double
[:, :] agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1)
:
cdef double
* agg_std(long* groups, double* x, size_t length, size_t width, long ddof=1) nogil
:
cdef long max_g = max_groups(groups, length)
cdef double[:, :] running_sum_square = zeros((max_g+1, width))
cdef double* running_sum_square_ptr = &running_sum_square[0, 0]
cdef double[:, :] running_sum = zeros((max_g+1, width))
cdef double* running_sum_ptr = &running_sum[0, 0]
cdef long[:] bin_count = zeros(max_g+1, dtype=int)
cdef long* bin_count_ptr = &bin_count[0]
cdef double* running_sum_square_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef double* running_sum_ptr = <double*>calloc((max_g+1)*width, sizeof(double))
cdef long* bin_count_ptr = <long*>calloc(max_g+1, sizeof(int))
cdef size_t i
cdef size_t j
cdef
long k
cdef size_t
indice
cdef
size_t loop_idx1
cdef size_t
loop_idx2
cdef long curr
cdef double raw_value
with nogil:
for i in range(length):
k = groups[i]
for i in range(length):
loop_idx1 = i * width
loop_idx2 = groups[i] * width
for j in range(width):
raw_value = x[loop_idx1 + j]
running_sum_ptr[loop_idx2 + j] += raw_value
running_sum_square_ptr[loop_idx2 + j] += raw_value * raw_value
bin_count_ptr[groups[i]] += 1
for i in range(max_g+1):
curr = bin_count_ptr[i]
loop_idx1 = i * width
if curr != 0:
for j in range(width):
raw_value = x[i*width + j]
running_sum_ptr[k*width + j] += raw_value
running_sum_square_ptr[k*width + j] += raw_value * raw_value
bin_count_ptr[k] += 1
loop_idx2 = loop_idx1 + j
running_sum_square_ptr[loop_idx2] = sqrt((running_sum_square_ptr[loop_idx2] - running_sum_ptr[loop_idx2] * running_sum_ptr[loop_idx2] / curr) / (curr - ddof))
for i in range(max_g+1):
curr = bin_count_ptr[i]
if curr != 0:
for j in range(width):
indice = i * width + j
running_sum_square_ptr[indice] = sqrt((running_sum_square_ptr[indice] - running_sum_ptr[indice] * running_sum_ptr[indice] / curr) / (curr - ddof))
return running_sum_square
free(running_sum_ptr)
free(bin_count_ptr)
return running_sum_square_ptr
@cython.boundscheck(False)
...
...
@@ -100,23 +109,22 @@ cpdef np.ndarray[double, ndim=2] transform(long[:] groups, double[:, :] x, str f
cdef size_t width = x.shape[1]
cdef double[:, :] res_data = zeros((length, width))
cdef double* res_data_ptr = &res_data[0, 0]
cdef double[:, :] value_data = zeros((length, width))
cdef double* value_data_ptr
cdef size_t i
cdef size_t j
cdef size_t k
cdef size_t loop_idx1
cdef size_t loop_idx2
if func == 'mean':
value_data = agg_mean(&groups[0], &x[0, 0], length, width)
value_data
_ptr
= agg_mean(&groups[0], &x[0, 0], length, width)
elif func == 'std':
value_data = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
value_data_ptr = &value_data[0, 0]
value_data_ptr = agg_std(&groups[0], &x[0, 0], length, width, ddof=1)
with nogil:
for i in range(length):
k = groups[i]
loop_idx1 = i*width
loop_idx2 = groups[i] * width
for j in range(width):
res_data_ptr[
i*width + j] = value_data_ptr[k*width
+ j]
res_data_ptr[
loop_idx1 + j] = value_data_ptr[loop_idx2
+ j]
free(value_data_ptr)
return asarray(res_data)
\ No newline at end of file
alphamind/benchmarks/benchmarks.py
View file @
258b0348
...
...
@@ -10,6 +10,8 @@ from alphamind.benchmarks.data.standardize import benchmark_standardize
from
alphamind.benchmarks.data.standardize
import
benchmark_standardize_with_group
from
alphamind.benchmarks.data.winsorize
import
benchmark_winsorize_normal
from
alphamind.benchmarks.data.winsorize
import
benchmark_winsorize_normal_with_group
from
alphamind.benchmarks.portfolio.rankbuild
import
benchmark_build_rank
from
alphamind.benchmarks.portfolio.rankbuild
import
benchmark_build_rank_with_group
if
__name__
==
'__main__'
:
...
...
@@ -28,3 +30,9 @@ if __name__ == '__main__':
benchmark_winsorize_normal_with_group
(
30
,
10
,
5000
,
5
)
benchmark_winsorize_normal
(
50000
,
50
,
20
)
benchmark_winsorize_normal_with_group
(
50000
,
50
,
20
,
50
)
benchmark_build_rank
(
3000
,
1000
,
300
)
benchmark_build_rank_with_group
(
3000
,
1000
,
10
,
30
)
benchmark_build_rank
(
30
,
50000
,
3
)
benchmark_build_rank_with_group
(
30
,
50000
,
1
,
3
)
benchmark_build_rank
(
50000
,
20
,
3000
)
benchmark_build_rank_with_group
(
50000
,
20
,
10
,
300
)
alphamind/benchmarks/portfolio/__init__.py
0 → 100644
View file @
258b0348
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
alphamind/benchmarks/portfolio/rankbuild.py
0 → 100644
View file @
258b0348
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
import
datetime
as
dt
import
numpy
as
np
import
pandas
as
pd
from
alphamind.portfolio.rankbuilder
import
rank_build
def
benchmark_build_rank
(
n_samples
:
int
,
n_loops
:
int
,
n_included
:
int
)
->
None
:
print
(
"-"
*
60
)
print
(
"Starting portfolio construction by rank benchmarking"
)
print
(
"Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})"
.
format
(
n_samples
,
n_included
,
n_loops
))
x
=
np
.
random
.
randn
(
n_samples
)
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
rank_build
(
x
,
n_included
)
impl_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Implemented model'
,
impl_model_time
))
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
expected_weights
=
np
.
zeros
(
len
(
x
))
expected_weights
[(
-
x
)
.
argsort
()
.
argsort
()
<
n_included
]
=
1.
/
n_included
benchmark_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Benchmark model'
,
benchmark_model_time
))
def
benchmark_build_rank_with_group
(
n_samples
:
int
,
n_loops
:
int
,
n_included
:
int
,
n_groups
:
int
)
->
None
:
print
(
"-"
*
60
)
print
(
"Starting portfolio construction by rank with group-by values benchmarking"
)
print
(
"Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})"
.
format
(
n_samples
,
n_included
,
n_loops
,
n_groups
))
x
=
np
.
random
.
randn
(
n_samples
)
groups
=
np
.
random
.
randint
(
n_groups
,
size
=
n_samples
)
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
_
=
rank_build
(
x
,
n_included
,
groups
=
groups
)
impl_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Implemented model'
,
impl_model_time
))
start
=
dt
.
datetime
.
now
()
for
_
in
range
(
n_loops
):
grouped_ordering
=
pd
.
Series
(
-
x
)
.
groupby
(
groups
)
.
rank
()
expected_weights
=
np
.
zeros
(
len
(
x
))
masks
=
grouped_ordering
<=
n_included
expected_weights
[
masks
]
=
1.
/
np
.
sum
(
masks
)
benchmark_model_time
=
dt
.
datetime
.
now
()
-
start
print
(
'{0:20s}: {1}'
.
format
(
'Benchmark model'
,
benchmark_model_time
))
if
__name__
==
'__main__'
:
benchmark_build_rank
(
3000
,
1000
,
300
)
benchmark_build_rank_with_group
(
3000
,
1000
,
10
,
30
)
alphamind/portfolio/rankbuilder.py
View file @
258b0348
...
...
@@ -15,12 +15,15 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None) -> np.nda
if
groups
is
not
None
:
max_g
=
np
.
max
(
groups
)
index_range
=
np
.
arange
(
len
(
er
))
for
i
in
range
(
max_g
+
1
):
current_mask
=
groups
==
i
current_ordering
=
ordering
[
current_mask
]
masks
[
current_ordering
[:
use_rank
]]
=
True
current_index
=
index_range
[
current_mask
]
current_ordering
=
neg_er
[
current_mask
]
.
argsort
()
masks
[
current_index
[
current_ordering
[:
use_rank
]]]
=
True
else
:
masks
[
ordering
[:
use_rank
]]
=
True
weights
=
np
.
zeros
(
len
(
er
))
...
...
@@ -33,10 +36,10 @@ if __name__ == '__main__':
import
datetime
as
dt
x
=
np
.
random
.
randn
(
3000
)
groups
=
np
.
random
.
randint
(
20
,
50
,
size
=
3000
)
groups
=
np
.
random
.
randint
(
30
,
size
=
3000
)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
10000
):
weights
=
rank_build
(
x
,
2
0
,
groups
)
weights
=
rank_build
(
x
,
3
0
,
groups
)
print
(
dt
.
datetime
.
now
()
-
start
)
#print(x, '\n', weights)
alphamind/tests/portfolio/__init__.py
View file @
258b0348
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
alphamind/tests/portfolio/test_rankbuild.py
0 → 100644
View file @
258b0348
# -*- coding: utf-8 -*-
"""
Created on 2017-4-27
@author: cheng.li
"""
import
unittest
import
numpy
as
np
import
pandas
as
pd
from
alphamind.portfolio.rankbuilder
import
rank_build
class
TestRankBuild
(
unittest
.
TestCase
):
def
test_rank_build
(
self
):
n_samples
=
3000
n_included
=
300
x
=
np
.
random
.
randn
(
n_samples
)
calc_weights
=
rank_build
(
x
,
n_included
)
expected_weights
=
np
.
zeros
(
len
(
x
))
expected_weights
[(
-
x
)
.
argsort
()
.
argsort
()
<
n_included
]
=
1.
/
n_included
np
.
testing
.
assert_array_almost_equal
(
calc_weights
,
expected_weights
)
def
test_rank_build_with_group
(
self
):
n_samples
=
3000
n_include
=
10
n_groups
=
30
x
=
np
.
random
.
randn
(
n_samples
)
groups
=
np
.
random
.
randint
(
n_groups
,
size
=
n_samples
)
calc_weights
=
rank_build
(
x
,
n_include
,
groups
)
grouped_ordering
=
pd
.
Series
(
-
x
)
.
groupby
(
groups
)
.
rank
()
expected_weights
=
np
.
zeros
(
len
(
x
))
masks
=
grouped_ordering
<=
n_include
expected_weights
[
masks
]
=
1.
/
np
.
sum
(
masks
)
np
.
testing
.
assert_array_almost_equal
(
calc_weights
,
expected_weights
)
if
__name__
==
'__main__'
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment