Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
e9d233d4
Commit
e9d233d4
authored
May 04, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
restructure
parent
8328777b
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
222 additions
and
252 deletions
+222
-252
aggregate.py
alphamind/aggregate.py
+0
-238
neutralize.py
alphamind/data/neutralize.py
+1
-1
standardize.py
alphamind/data/standardize.py
+4
-4
winsorize.py
alphamind/data/winsorize.py
+4
-4
rankbuilder.py
alphamind/portfolio/rankbuilder.py
+1
-1
simplesettle.py
alphamind/settlement/simplesettle.py
+3
-3
utilities.py
alphamind/utilities.py
+209
-0
requirements.txt
requirements.txt
+0
-1
No files found.
alphamind/aggregate.py
deleted
100644 → 0
View file @
8328777b
# -*- coding: utf-8 -*-
"""
Created on 2017-5-3
@author: cheng.li
"""
import
math
import
numpy
as
np
import
numba
as
nb
def
groupby
(
groups
):
order_group_idx
=
groups
.
argsort
()
counts
=
np
.
bincount
(
groups
)
nonzero_idx
=
counts
.
nonzero
()[
0
]
start
=
0
res
=
[]
for
i
in
nonzero_idx
:
num_g
=
counts
[
i
]
res
.
append
(
order_group_idx
[
start
:
start
+
num_g
])
start
+=
num_g
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
group_mapping
(
groups
):
length
=
groups
.
shape
[
0
]
order
=
groups
.
argsort
()
res
=
np
.
zeros
(
length
,
dtype
=
order
.
dtype
)
start
=
0
res
[
order
[
0
]]
=
start
previous
=
groups
[
order
[
0
]]
for
i
in
range
(
1
,
length
):
curr_idx
=
order
[
i
]
curr_val
=
groups
[
curr_idx
]
if
curr_val
!=
previous
:
start
+=
1
res
[
curr_idx
]
=
start
else
:
res
[
curr_idx
]
=
start
previous
=
curr_val
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_sum
(
x
,
axis
=
0
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
j
]
+=
x
[
i
,
j
]
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_mean
(
x
,
axis
=
0
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
for
j
in
range
(
width
):
for
i
in
range
(
length
):
res
[
j
]
+=
x
[
i
,
j
]
res
[
j
]
/=
length
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
res
[
i
]
/=
width
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_std
(
x
,
axis
=
0
,
ddof
=
1
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
sum_mat
=
np
.
zeros
(
width
)
for
j
in
range
(
width
):
for
i
in
range
(
length
):
res
[
j
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
sum_mat
[
j
]
+=
x
[
i
,
j
]
res
[
j
]
=
math
.
sqrt
((
res
[
j
]
-
sum_mat
[
j
]
*
sum_mat
[
j
]
/
length
)
/
(
length
-
ddof
))
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
sum_mat
=
np
.
zeros
(
width
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
sum_mat
[
i
]
+=
x
[
i
,
j
]
res
[
i
]
=
math
.
sqrt
((
res
[
i
]
-
sum_mat
[
i
]
*
sum_mat
[
i
]
/
width
)
/
(
width
-
ddof
))
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_sum
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_abssum
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
abs
(
x
[
i
,
j
])
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_mean
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
bin_count
=
np
.
zeros
(
max_g
+
1
,
dtype
=
np
.
int32
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
bin_count
[
groups
[
i
]]
+=
1
for
i
in
range
(
max_g
+
1
):
curr
=
bin_count
[
i
]
for
j
in
range
(
width
):
res
[
i
,
j
]
/=
curr
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_std
(
groups
,
x
,
ddof
=
1
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
sumsq
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
bin_count
=
np
.
zeros
(
max_g
+
1
,
dtype
=
np
.
int32
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
sumsq
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
bin_count
[
groups
[
i
]]
+=
1
for
i
in
range
(
max_g
+
1
):
curr
=
bin_count
[
i
]
for
j
in
range
(
width
):
res
[
i
,
j
]
=
math
.
sqrt
((
sumsq
[
i
,
j
]
-
res
[
i
,
j
]
*
res
[
i
,
j
]
/
curr
)
/
(
curr
-
ddof
))
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
copy_value
(
groups
,
source
):
length
=
groups
.
shape
[
0
]
width
=
source
.
shape
[
1
]
destination
=
np
.
zeros
((
length
,
width
))
for
i
in
range
(
length
):
k
=
groups
[
i
]
for
j
in
range
(
width
):
destination
[
i
,
j
]
=
source
[
k
,
j
]
return
destination
def
transform
(
groups
,
x
,
func
,
ddof
=
1
):
if
func
==
'mean'
:
value_data
=
agg_mean
(
groups
,
x
)
elif
func
==
'std'
:
value_data
=
agg_std
(
groups
,
x
,
ddof
=
ddof
)
elif
func
==
'sum'
:
value_data
=
agg_sum
(
groups
,
x
)
elif
func
==
'abssum'
:
value_data
=
agg_abssum
(
groups
,
x
)
else
:
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
copy_value
(
groups
,
value_data
)
def
aggregate
(
groups
,
x
,
func
,
ddof
=
1
):
if
func
==
'mean'
:
value_data
=
agg_mean
(
groups
,
x
)
elif
func
==
'std'
:
value_data
=
agg_std
(
groups
,
x
,
ddof
=
ddof
)
elif
func
==
'sum'
:
value_data
=
agg_sum
(
groups
,
x
)
elif
func
==
'abssum'
:
value_data
=
agg_abssum
(
groups
,
x
)
else
:
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
value_data
if
__name__
==
'__main__'
:
n_samples
=
6000
n_features
=
10
n_groups
=
30
groups
=
np
.
random
.
randint
(
n_groups
,
size
=
n_samples
)
max_g
=
n_groups
-
1
x
=
np
.
random
.
randn
(
n_samples
,
n_features
)
import
datetime
as
dt
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
1000
):
res
=
aggregate
(
groups
,
x
,
'mean'
)
print
(
dt
.
datetime
.
now
()
-
start
)
#transform = nb.jit(transform)
start
=
dt
.
datetime
.
now
()
for
i
in
range
(
1000
):
res
=
aggregate
(
groups
,
x
,
'mean'
)
print
(
dt
.
datetime
.
now
()
-
start
)
\ No newline at end of file
alphamind/data/neutralize.py
View file @
e9d233d4
...
...
@@ -12,7 +12,7 @@ from numpy.linalg import solve
from
typing
import
Tuple
from
typing
import
Union
from
typing
import
Dict
from
alphamind.
aggregate
import
groupby
from
alphamind.
utilities
import
groupby
def
neutralize
(
x
:
np
.
ndarray
,
y
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
output_explained
=
False
,
output_exposure
=
False
)
\
...
...
alphamind/data/standardize.py
View file @
e9d233d4
...
...
@@ -6,10 +6,10 @@ Created on 2017-4-25
"""
import
numpy
as
np
from
alphamind.
aggregate
import
group_mapping
from
alphamind.
aggregate
import
transform
from
alphamind.
aggregate
import
simple_mean
from
alphamind.
aggregate
import
simple_std
from
alphamind.
utilities
import
group_mapping
from
alphamind.
utilities
import
transform
from
alphamind.
utilities
import
simple_mean
from
alphamind.
utilities
import
simple_std
def
standardize
(
x
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
,
ddof
=
1
)
->
np
.
ndarray
:
...
...
alphamind/data/winsorize.py
View file @
e9d233d4
...
...
@@ -7,10 +7,10 @@ Created on 2017-4-25
import
numpy
as
np
import
numba
as
nb
from
alphamind.
aggregate
import
group_mapping
from
alphamind.
aggregate
import
transform
from
alphamind.
aggregate
import
simple_mean
from
alphamind.
aggregate
import
simple_std
from
alphamind.
utilities
import
group_mapping
from
alphamind.
utilities
import
transform
from
alphamind.
utilities
import
simple_mean
from
alphamind.
utilities
import
simple_std
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
...
...
alphamind/portfolio/rankbuilder.py
View file @
e9d233d4
...
...
@@ -9,7 +9,7 @@ import numpy as np
import
numba
as
nb
from
numpy
import
zeros
from
numpy
import
zeros_like
from
alphamind.
aggregate
import
groupby
from
alphamind.
utilities
import
groupby
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
...
...
alphamind/settlement/simplesettle.py
View file @
e9d233d4
...
...
@@ -6,9 +6,9 @@ Created on 2017-4-28
"""
import
numpy
as
np
from
alphamind.
aggregate
import
group_mapping
from
alphamind.
aggregate
import
aggregate
from
alphamind.
aggregate
import
simple_sum
from
alphamind.
utilities
import
group_mapping
from
alphamind.
utilities
import
aggregate
from
alphamind.
utilities
import
simple_sum
def
simple_settle
(
weights
:
np
.
ndarray
,
ret_series
:
np
.
ndarray
,
groups
:
np
.
ndarray
=
None
)
->
np
.
ndarray
:
...
...
alphamind/utilities.py
View file @
e9d233d4
...
...
@@ -9,6 +9,10 @@ import os
import
sys
import
logging
import
unittest
import
math
from
typing
import
List
import
numpy
as
np
import
numba
as
nb
alpha_logger
=
logging
.
getLogger
(
'ALPHA_MIND'
)
...
...
@@ -46,3 +50,208 @@ class TestRunner(object):
sys
.
exit
(
-
1
)
else
:
sys
.
exit
(
0
)
def
groupby
(
groups
:
np
.
ndarray
)
->
List
[
np
.
ndarray
]:
order_group_idx
=
groups
.
argsort
()
counts
=
np
.
bincount
(
groups
)
nonzero_idx
=
counts
.
nonzero
()[
0
]
start
=
0
res
=
[]
for
i
in
nonzero_idx
:
num_g
=
counts
[
i
]
res
.
append
(
order_group_idx
[
start
:
start
+
num_g
])
start
+=
num_g
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
group_mapping
(
groups
:
np
.
ndarray
)
->
np
.
ndarray
:
length
=
groups
.
shape
[
0
]
order
=
groups
.
argsort
()
res
=
np
.
zeros
(
length
,
dtype
=
order
.
dtype
)
start
=
0
res
[
order
[
0
]]
=
start
previous
=
groups
[
order
[
0
]]
for
i
in
range
(
1
,
length
):
curr_idx
=
order
[
i
]
curr_val
=
groups
[
curr_idx
]
if
curr_val
!=
previous
:
start
+=
1
res
[
curr_idx
]
=
start
else
:
res
[
curr_idx
]
=
start
previous
=
curr_val
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_sum
(
x
,
axis
=
0
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
j
]
+=
x
[
i
,
j
]
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_mean
(
x
,
axis
=
0
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
for
j
in
range
(
width
):
for
i
in
range
(
length
):
res
[
j
]
+=
x
[
i
,
j
]
res
[
j
]
/=
length
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
res
[
i
]
/=
width
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
simple_std
(
x
,
axis
=
0
,
ddof
=
1
):
length
,
width
=
x
.
shape
if
axis
==
0
:
res
=
np
.
zeros
(
width
)
sum_mat
=
np
.
zeros
(
width
)
for
j
in
range
(
width
):
for
i
in
range
(
length
):
res
[
j
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
sum_mat
[
j
]
+=
x
[
i
,
j
]
res
[
j
]
=
math
.
sqrt
((
res
[
j
]
-
sum_mat
[
j
]
*
sum_mat
[
j
]
/
length
)
/
(
length
-
ddof
))
elif
axis
==
1
:
res
=
np
.
zeros
(
length
)
sum_mat
=
np
.
zeros
(
width
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
i
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
sum_mat
[
i
]
+=
x
[
i
,
j
]
res
[
i
]
=
math
.
sqrt
((
res
[
i
]
-
sum_mat
[
i
]
*
sum_mat
[
i
]
/
width
)
/
(
width
-
ddof
))
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_sum
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_abssum
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
abs
(
x
[
i
,
j
])
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_mean
(
groups
,
x
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
bin_count
=
np
.
zeros
(
max_g
+
1
,
dtype
=
np
.
int32
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
bin_count
[
groups
[
i
]]
+=
1
for
i
in
range
(
max_g
+
1
):
curr
=
bin_count
[
i
]
for
j
in
range
(
width
):
res
[
i
,
j
]
/=
curr
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
agg_std
(
groups
,
x
,
ddof
=
1
):
max_g
=
groups
.
max
()
length
,
width
=
x
.
shape
res
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
sumsq
=
np
.
zeros
((
max_g
+
1
,
width
),
dtype
=
np
.
float64
)
bin_count
=
np
.
zeros
(
max_g
+
1
,
dtype
=
np
.
int32
)
for
i
in
range
(
length
):
for
j
in
range
(
width
):
res
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
sumsq
[
groups
[
i
],
j
]
+=
x
[
i
,
j
]
*
x
[
i
,
j
]
bin_count
[
groups
[
i
]]
+=
1
for
i
in
range
(
max_g
+
1
):
curr
=
bin_count
[
i
]
for
j
in
range
(
width
):
res
[
i
,
j
]
=
math
.
sqrt
((
sumsq
[
i
,
j
]
-
res
[
i
,
j
]
*
res
[
i
,
j
]
/
curr
)
/
(
curr
-
ddof
))
return
res
@
nb
.
njit
(
nogil
=
True
,
cache
=
True
)
def
copy_value
(
groups
,
source
):
length
=
groups
.
shape
[
0
]
width
=
source
.
shape
[
1
]
destination
=
np
.
zeros
((
length
,
width
))
for
i
in
range
(
length
):
k
=
groups
[
i
]
for
j
in
range
(
width
):
destination
[
i
,
j
]
=
source
[
k
,
j
]
return
destination
def
transform
(
groups
,
x
,
func
,
ddof
=
1
):
if
func
==
'mean'
:
value_data
=
agg_mean
(
groups
,
x
)
elif
func
==
'std'
:
value_data
=
agg_std
(
groups
,
x
,
ddof
=
ddof
)
elif
func
==
'sum'
:
value_data
=
agg_sum
(
groups
,
x
)
elif
func
==
'abssum'
:
value_data
=
agg_abssum
(
groups
,
x
)
else
:
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
copy_value
(
groups
,
value_data
)
def
aggregate
(
groups
,
x
,
func
,
ddof
=
1
):
if
func
==
'mean'
:
value_data
=
agg_mean
(
groups
,
x
)
elif
func
==
'std'
:
value_data
=
agg_std
(
groups
,
x
,
ddof
=
ddof
)
elif
func
==
'sum'
:
value_data
=
agg_sum
(
groups
,
x
)
elif
func
==
'abssum'
:
value_data
=
agg_abssum
(
groups
,
x
)
else
:
raise
ValueError
(
'({0}) is not recognized as valid functor'
.
format
(
func
))
return
value_data
requirements.txt
View file @
e9d233d4
cython
>= 0.25.2
numpy
>= 1.12.1
numba
>= 0.30.0
scikit-learn
>= 0.18.1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment