Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Sign in
Toggle navigation
A
alpha-mind
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Dr.李
alpha-mind
Commits
bd3c56b0
Commit
bd3c56b0
authored
Aug 25, 2017
by
Dr.李
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
replace column names
parent
9e66adbd
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
71 additions
and
69 deletions
+71
-69
calculators.py
alphamind/analysis/calculators.py
+3
-3
sqlengine.py
alphamind/data/engines/sqlengine.py
+44
-44
universe.py
alphamind/data/engines/universe.py
+6
-6
model_training.py
alphamind/examples/model_training.py
+10
-8
data_preparing.py
alphamind/model/data_preparing.py
+8
-8
No files found.
alphamind/analysis/calculators.py
View file @
bd3c56b0
...
@@ -10,13 +10,13 @@ import pandas as pd
...
@@ -10,13 +10,13 @@ import pandas as pd
def
calculate_turn_over
(
pos_table
:
pd
.
DataFrame
)
->
pd
.
DataFrame
:
def
calculate_turn_over
(
pos_table
:
pd
.
DataFrame
)
->
pd
.
DataFrame
:
turn_over_table
=
{}
turn_over_table
=
{}
total_factors
=
pos_table
.
columns
.
difference
([
'
C
ode'
])
total_factors
=
pos_table
.
columns
.
difference
([
'
c
ode'
])
pos_table
.
reset_index
()
pos_table
.
reset_index
()
for
name
in
total_factors
:
for
name
in
total_factors
:
pivot_position
=
pos_table
.
pivot
(
values
=
name
,
columns
=
'
C
ode'
)
.
fillna
(
0.
)
pivot_position
=
pos_table
.
pivot
(
values
=
name
,
columns
=
'
c
ode'
)
.
fillna
(
0.
)
turn_over_series
=
pivot_position
.
diff
()
.
abs
()
.
sum
(
axis
=
1
)
turn_over_series
=
pivot_position
.
diff
()
.
abs
()
.
sum
(
axis
=
1
)
turn_over_table
[
name
]
=
turn_over_series
.
values
turn_over_table
[
name
]
=
turn_over_series
.
values
turn_over_table
=
pd
.
DataFrame
(
turn_over_table
,
index
=
pos_table
.
D
ate
.
unique
())
turn_over_table
=
pd
.
DataFrame
(
turn_over_table
,
index
=
pos_table
.
trade_d
ate
.
unique
())
return
turn_over_table
[
total_factors
]
return
turn_over_table
[
total_factors
]
alphamind/data/engines/sqlengine.py
View file @
bd3c56b0
This diff is collapsed.
Click to expand it.
alphamind/data/engines/universe.py
View file @
bd3c56b0
...
@@ -41,7 +41,7 @@ class Universe(object):
...
@@ -41,7 +41,7 @@ class Universe(object):
all_and_conditions
.
append
(
univ_out
)
all_and_conditions
.
append
(
univ_out
)
if
self
.
exclude_codes
:
if
self
.
exclude_codes
:
codes_out
=
UniverseTable
.
C
ode
.
notin_
(
self
.
exclude_codes
)
codes_out
=
UniverseTable
.
c
ode
.
notin_
(
self
.
exclude_codes
)
all_and_conditions
.
append
(
codes_out
)
all_and_conditions
.
append
(
codes_out
)
all_or_conditions
=
[]
all_or_conditions
=
[]
...
@@ -50,18 +50,18 @@ class Universe(object):
...
@@ -50,18 +50,18 @@ class Universe(object):
all_or_conditions
.
append
(
univ_in
)
all_or_conditions
.
append
(
univ_in
)
if
self
.
include_codes
:
if
self
.
include_codes
:
codes_in
=
UniverseTable
.
C
ode
.
in_
(
self
.
include_codes
)
codes_in
=
UniverseTable
.
c
ode
.
in_
(
self
.
include_codes
)
all_or_conditions
.
append
(
codes_in
)
all_or_conditions
.
append
(
codes_in
)
return
all_and_conditions
,
all_or_conditions
return
all_and_conditions
,
all_or_conditions
def
query
(
self
,
ref_date
):
def
query
(
self
,
ref_date
):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
C
ode
])
.
distinct
()
query
=
select
([
UniverseTable
.
trade_date
,
UniverseTable
.
c
ode
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
query
=
query
.
where
(
query
=
query
.
where
(
and_
(
and_
(
UniverseTable
.
D
ate
==
ref_date
,
UniverseTable
.
trade_d
ate
==
ref_date
,
or_
(
or_
(
and_
(
*
all_and_conditions
),
and_
(
*
all_and_conditions
),
*
all_or_conditions
*
all_or_conditions
...
@@ -72,10 +72,10 @@ class Universe(object):
...
@@ -72,10 +72,10 @@ class Universe(object):
return
query
return
query
def
query_range
(
self
,
start_date
=
None
,
end_date
=
None
,
dates
=
None
):
def
query_range
(
self
,
start_date
=
None
,
end_date
=
None
,
dates
=
None
):
query
=
select
([
UniverseTable
.
Date
,
UniverseTable
.
C
ode
])
.
distinct
()
query
=
select
([
UniverseTable
.
trade_date
,
UniverseTable
.
c
ode
])
.
distinct
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
all_and_conditions
,
all_or_conditions
=
self
.
_create_condition
()
dates_cond
=
UniverseTable
.
Date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
D
ate
.
between
(
start_date
,
end_date
)
dates_cond
=
UniverseTable
.
trade_date
.
in_
(
dates
)
if
dates
else
UniverseTable
.
trade_d
ate
.
between
(
start_date
,
end_date
)
query
=
query
.
where
(
query
=
query
.
where
(
and_
(
and_
(
...
...
alphamind/examples/model_training.py
View file @
bd3c56b0
...
@@ -32,13 +32,13 @@ training - every 4 week
...
@@ -32,13 +32,13 @@ training - every 4 week
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
engine
=
SqlEngine
(
'postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
)
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
universe
=
Universe
(
'zz500'
,
[
'zz500'
])
neutralize_risk
=
industry_styles
neutralize_risk
=
industry_styles
alpha_factors
=
risk_styles
alpha_factors
=
[
'BDTO'
,
'RVOL'
,
'CHV'
,
'VAL'
,
'CFinc1'
]
#
risk_styles
benchmark
=
905
benchmark
=
905
n_bins
=
5
n_bins
=
5
frequency
=
'1w'
frequency
=
'1w'
batch
=
4
batch
=
4
start_date
=
'2012-01-01'
start_date
=
'2012-01-01'
end_date
=
'2017-08-
0
1'
end_date
=
'2017-08-
3
1'
'''
'''
fetch data from target data base and do the corresponding data processing
fetch data from target data base and do the corresponding data processing
...
@@ -74,7 +74,7 @@ for train_date in dates:
...
@@ -74,7 +74,7 @@ for train_date in dates:
model
.
fit
(
x
,
y
)
model
.
fit
(
x
,
y
)
model_df
.
loc
[
train_date
]
=
copy
.
deepcopy
(
model
)
model_df
.
loc
[
train_date
]
=
copy
.
deepcopy
(
model
)
print
(
'
D
ate: {0} training finished'
.
format
(
train_date
))
print
(
'
trade_d
ate: {0} training finished'
.
format
(
train_date
))
'''
'''
predicting phase: using trained model on the re-balance dates
predicting phase: using trained model on the re-balance dates
...
@@ -89,8 +89,8 @@ final_res = np.zeros((len(dates), n_bins))
...
@@ -89,8 +89,8 @@ final_res = np.zeros((len(dates), n_bins))
for
i
,
predict_date
in
enumerate
(
dates
):
for
i
,
predict_date
in
enumerate
(
dates
):
model
=
model_df
[
predict_date
]
model
=
model_df
[
predict_date
]
x
=
predict_x
[
predict_date
]
x
=
predict_x
[
predict_date
]
benchmark_w
=
settlement
[
settlement
.
D
ate
==
predict_date
][
'weight'
]
.
values
benchmark_w
=
settlement
[
settlement
.
trade_d
ate
==
predict_date
][
'weight'
]
.
values
realized_r
=
settlement
[
settlement
.
D
ate
==
predict_date
][
'dx'
]
.
values
realized_r
=
settlement
[
settlement
.
trade_d
ate
==
predict_date
][
'dx'
]
.
values
predict_y
=
model
.
predict
(
x
)
predict_y
=
model
.
predict
(
x
)
...
@@ -100,12 +100,14 @@ for i, predict_date in enumerate(dates):
...
@@ -100,12 +100,14 @@ for i, predict_date in enumerate(dates):
benchmark
=
benchmark_w
)
benchmark
=
benchmark_w
)
final_res
[
i
]
=
res
/
benchmark_w
.
sum
()
final_res
[
i
]
=
res
/
benchmark_w
.
sum
()
print
(
'
D
ate: {0} predicting finished'
.
format
(
train_date
))
print
(
'
trade_d
ate: {0} predicting finished'
.
format
(
train_date
))
last_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
last_date
=
advanceDateByCalendar
(
'china.sse'
,
dates
[
-
1
],
frequency
)
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
[
1
:]
+
[
last_date
])
df
=
pd
.
DataFrame
(
final_res
,
index
=
dates
[
1
:]
+
[
last_date
])
df
.
sort_index
(
inplace
=
True
)
df
.
sort_index
(
inplace
=
True
)
df
=
df
.
cumsum
()
.
plot
()
df
.
cumsum
()
.
plot
()
plt
.
title
(
'
Prod
factors model training with Linear Regression from 2012 - 2017'
)
plt
.
title
(
'
Risk style
factors model training with Linear Regression from 2012 - 2017'
)
plt
.
show
()
plt
.
show
()
df
=
df
.
cumsum
()
df
.
to_csv
(
'd:/20120101_20170823_bt.csv'
)
alphamind/model/data_preparing.py
View file @
bd3c56b0
...
@@ -50,15 +50,15 @@ def prepare_data(engine: SqlEngine,
...
@@ -50,15 +50,15 @@ def prepare_data(engine: SqlEngine,
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factor_df
=
engine
.
fetch_factor_range
(
universe
,
factors
=
transformer
,
factors
=
transformer
,
dates
=
dates
,
dates
=
dates
,
warm_start
=
warm_start
)
.
sort_values
([
'
Date'
,
'C
ode'
])
warm_start
=
warm_start
)
.
sort_values
([
'
trade_date'
,
'c
ode'
])
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
return_df
=
engine
.
fetch_dx_return_range
(
universe
,
dates
=
dates
,
horizon
=
horizon
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
benchmark_df
=
engine
.
fetch_benchmark_range
(
benchmark
,
dates
=
dates
)
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'
Date'
,
'C
ode'
])
.
dropna
()
df
=
pd
.
merge
(
factor_df
,
return_df
,
on
=
[
'
trade_date'
,
'c
ode'
])
.
dropna
()
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'
Date'
,
'C
ode'
],
how
=
'left'
)
df
=
pd
.
merge
(
df
,
benchmark_df
,
on
=
[
'
trade_date'
,
'c
ode'
],
how
=
'left'
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
df
[
'weight'
]
=
df
[
'weight'
]
.
fillna
(
0.
)
return
df
[[
'
Date'
,
'Code'
,
'dx'
]],
df
[[
'Date'
,
'C
ode'
,
'weight'
]
+
transformer
.
names
]
return
df
[[
'
trade_date'
,
'code'
,
'dx'
]],
df
[[
'trade_date'
,
'c
ode'
,
'weight'
]
+
transformer
.
names
]
def
batch_processing
(
x_values
,
def
batch_processing
(
x_values
,
...
@@ -137,10 +137,10 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -137,10 +137,10 @@ def fetch_data_package(engine: SqlEngine,
if
neutralized_risk
:
if
neutralized_risk
:
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
risk_df
=
engine
.
fetch_risk_model_range
(
universe
,
dates
=
dates
,
risk_model
=
risk_model
)[
1
]
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
transformer
.
names
))
used_neutralized_risk
=
list
(
set
(
neutralized_risk
)
.
difference
(
transformer
.
names
))
risk_df
=
risk_df
[[
'
Date'
,
'C
ode'
]
+
used_neutralized_risk
]
.
dropna
()
risk_df
=
risk_df
[[
'
trade_date'
,
'c
ode'
]
+
used_neutralized_risk
]
.
dropna
()
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'
Date'
,
'C
ode'
])
train_x
=
pd
.
merge
(
factor_df
,
risk_df
,
on
=
[
'
trade_date'
,
'c
ode'
])
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'
Date'
,
'Code'
])[[
'Date'
,
'C
ode'
,
'dx'
]]
return_df
=
pd
.
merge
(
return_df
,
risk_df
,
on
=
[
'
trade_date'
,
'code'
])[[
'trade_date'
,
'c
ode'
,
'dx'
]]
train_y
=
return_df
.
copy
()
train_y
=
return_df
.
copy
()
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
risk_exp
=
train_x
[
neutralized_risk
]
.
values
.
astype
(
float
)
...
@@ -153,7 +153,7 @@ def fetch_data_package(engine: SqlEngine,
...
@@ -153,7 +153,7 @@ def fetch_data_package(engine: SqlEngine,
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
x_values
=
train_x
[
transformer
.
names
]
.
values
.
astype
(
float
)
y_values
=
train_y
[[
'dx'
]]
.
values
y_values
=
train_y
[[
'dx'
]]
.
values
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
D
ate
)
.
to_pydatetime
()
date_label
=
pd
.
DatetimeIndex
(
factor_df
.
trade_d
ate
)
.
to_pydatetime
()
dates
=
np
.
unique
(
date_label
)
dates
=
np
.
unique
(
date_label
)
return_df
[
'weight'
]
=
train_x
[
'weight'
]
return_df
[
'weight'
]
=
train_x
[
'weight'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment