Commit ee112dba authored by Dr.李's avatar Dr.李

FORMAT: reformat codes

parent b5418fb8
......@@ -5,5 +5,4 @@ Created on 2017-4-25
@author: cheng.li
"""
__version__ = "0.2.1"
......@@ -8,10 +8,11 @@ Created on 2018-3-5
import numpy as np
import pandas as pd
import statsmodels.api as sm
from alphamind.utilities import alpha_logger
from alphamind.data.processing import factor_processing
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.utilities import alpha_logger
def cs_impl(ref_date,
......@@ -33,7 +34,8 @@ def cs_impl(ref_date,
total_risk_exp = total_data[constraint_risk]
er = total_data[[factor_name]].values.astype(float)
er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values, [standardize]).flatten()
er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values,
[standardize]).flatten()
industry = total_data.industry_name.values
codes = total_data.code.tolist()
......@@ -75,7 +77,8 @@ def cross_section_analysis(ref_date,
industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1)
dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1)
return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, industry_matrix, dx_returns)
return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk,
industry_matrix, dx_returns)
if __name__ == '__main__':
......
......@@ -8,18 +8,20 @@ Created on 2017-5-25
from typing import Optional
from typing import Tuple
from typing import Union
import numpy as np
import pandas as pd
from alphamind.data.processing import factor_processing
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.longshortbulder import long_short_builder
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.longshortbulder import long_short_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder
from alphamind.data.processing import factor_processing
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.settlement.simplesettle import simple_settle
......@@ -106,8 +108,9 @@ def er_portfolio_analysis(er: np.ndarray,
raise ValueError('linear programming optimizer in status: {0}'.format(status))
elif method == 'rank':
weights = rank_build(er, use_rank=kwargs['use_rank'], masks=is_tradable).flatten() * benchmark.sum() / kwargs[
'use_rank']
weights = rank_build(er, use_rank=kwargs['use_rank'],
masks=is_tradable).flatten() * benchmark.sum() / kwargs[
'use_rank']
elif method == 'ls' or method == 'long_short':
weights = long_short_builder(er).flatten()
elif method == 'mv' or method == 'mean_variance':
......
......@@ -6,6 +6,7 @@ Created on 2017-5-12
"""
import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis
......
......@@ -6,22 +6,23 @@ Created on 2017-8-16
"""
from typing import Optional
import numpy as np
import pandas as pd
from alphamind.utilities import agg_mean
from alphamind.data.processing import factor_processing
from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.processing import factor_processing
from alphamind.utilities import agg_mean
def quantile_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray,
dx_return: np.ndarray,
n_bins: int=5,
risk_exp: Optional[np.ndarray]=None,
n_bins: int = 5,
risk_exp: Optional[np.ndarray] = None,
**kwargs):
if 'pre_process' in kwargs:
pre_process = kwargs['pre_process']
del kwargs['pre_process']
......@@ -42,7 +43,6 @@ def er_quantile_analysis(er: np.ndarray,
n_bins: int,
dx_return: np.ndarray,
de_trend=False) -> np.ndarray:
er = er.flatten()
q_groups = quantile(er, n_bins)
......@@ -78,8 +78,8 @@ if __name__ == '__main__':
r,
risk_exp=None,
n_bins=n_bins,
pre_process=[], #[winsorize_normal, standardize],
post_process=[]) #[standardize])
pre_process=[], # [winsorize_normal, standardize],
post_process=[]) # [standardize])
er = x_w @ f_df.values.T
expected = er_quantile_analysis(er, n_bins, r)
......
......@@ -6,15 +6,16 @@ Created on 2017-5-6
"""
from typing import Tuple
import numpy as np
import pandas as pd
from alphamind.data.neutralize import neutralize
def risk_analysis(net_weight_series: pd.Series,
next_bar_return_series: pd.Series,
risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
group_idx = net_weight_series.index.values.astype(int)
net_pos = net_weight_series.values.reshape((-1, 1))
risk_factor_cols = risk_table.columns
......@@ -31,6 +32,8 @@ def risk_analysis(net_weight_series: pd.Series,
cols = ['idiosyncratic']
cols.extend(risk_factor_cols)
explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, index=net_weight_series.index)
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index)
explained_table = pd.DataFrame(explained_table * net_pos, columns=cols,
index=net_weight_series.index)
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols,
index=net_weight_series.index)
return explained_table, exposure_table.groupby(level=0).first()
......@@ -5,63 +5,57 @@ Created on 2017-8-16
@author: cheng.li
"""
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.analysis.quantileanalysis import er_quantile_analysis
from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.data.engines.sqlengine import industry_styles
from alphamind.data.engines.sqlengine import macro_styles
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.winsorize import NormalWinsorizer
from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.engines.universe import Universe
from alphamind.data.engines.utilities import industry_list
from alphamind.data.neutralize import neutralize
from alphamind.data.rank import rank
from alphamind.data.processing import factor_processing
from alphamind.data.rank import percentile
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.data.engines.utilities import industry_list
from alphamind.model import LinearRegression
from alphamind.model import LassoRegression
from alphamind.data.rank import rank
from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import projection
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import NormalWinsorizer
from alphamind.data.winsorize import winsorize_normal
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
from alphamind.model import ConstLinearModel
from alphamind.model import LassoRegression
from alphamind.model import LinearRegression
from alphamind.model import LogisticRegression
from alphamind.model import RandomForestRegressor
from alphamind.model import NvSVRModel
from alphamind.model import RandomForestClassifier
from alphamind.model import XGBRegressor
from alphamind.model import RandomForestRegressor
from alphamind.model import XGBClassifier
from alphamind.model import XGBRegressor
from alphamind.model import XGBTrainer
from alphamind.model import NvSVRModel
from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta
from alphamind.model.composer import train_model
from alphamind.model.composer import predict_by_model
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.model.composer import train_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
__all__ = [
'SqlEngine',
'factor_analysis',
......@@ -113,4 +107,4 @@ __all__ = [
'ExecutionPipeline',
'alpha_logger',
'map_freq'
]
\ No newline at end of file
]
......@@ -11,17 +11,15 @@ from alphamind.benchmarks.data.standardize import benchmark_standardize
from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear
from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent
from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group
from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30)
benchmark_neutralize(30, 3, 50000)
......
......@@ -16,7 +16,8 @@ from alphamind.data.neutralize import neutralize
def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60)
print("Starting least square fitting benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
y = np.random.randn(n_samples, 5)
x = np.random.randn(n_samples, n_features)
......@@ -40,13 +41,15 @@ def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
np.testing.assert_array_almost_equal(calc_res, exp_res)
def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60)
print("Starting least square fitting with group benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
y = np.random.randn(n_samples, 5)
x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples)
......@@ -71,7 +74,7 @@ def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: i
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30)
......@@ -6,16 +6,19 @@ Created on 2017-4-25
"""
import datetime as dt
import numpy as np
import pandas as pd
from scipy.stats import zscore
from alphamind.data.standardize import standardize
def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60)
print("Starting standardizing benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
x = np.random.randn(n_samples, n_features)
......@@ -34,10 +37,15 @@ def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60)
print("Starting standardizing with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples)
......@@ -51,7 +59,8 @@ def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: i
start = dt.datetime.now()
for _ in range(n_loops):
_ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
_ = pd.DataFrame(x).groupby(groups).transform(
lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
......
......@@ -6,15 +6,18 @@ Created on 2017-4-25
"""
import datetime as dt
import numpy as np
import pandas as pd
from alphamind.data.winsorize import winsorize_normal
def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60)
print("Starting winsorize normal benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops))
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
num_stds = 2
......@@ -46,10 +49,15 @@ def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None:
def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60)
print("Starting winsorize normal with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups))
print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
num_stds = 2
......
......@@ -3,4 +3,4 @@
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -6,10 +6,12 @@ Created on 2017-5-5
"""
import datetime as dt
import numpy as np
from scipy.optimize import linprog
from cvxopt import matrix
from cvxopt import solvers
from scipy.optimize import linprog
from alphamind.portfolio.linearbuilder import linear_builder
solvers.options['show_progress'] = False
......@@ -18,7 +20,8 @@ solvers.options['show_progress'] = False
def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
print("-" * 60)
print("Starting portfolio construction by linear programming")
print("Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop))
print(
"Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop))
er = np.random.randn(n_samples)
risk_exp = np.random.randn(n_samples, n_risks)
......@@ -38,7 +41,7 @@ def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
ubound,
risk_exp,
risk_target=(risk_lbound,
risk_ubound))
risk_ubound))
impl_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Implemented model (ECOS)', impl_model_time))
......@@ -70,4 +73,4 @@ def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
if __name__ == '__main__':
benchmark_build_linear(2000, 30, 10)
\ No newline at end of file
benchmark_build_linear(2000, 30, 10)
......@@ -4,4 +4,3 @@ Created on 2017-5-9
@author: cheng.li
"""
......@@ -6,15 +6,18 @@ Created on 2017-5-4
"""
import datetime as dt
import numpy as np
import pandas as pd
from alphamind.portfolio.percentbuilder import percent_build
def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None:
print("-" * 60)
print("Starting portfolio construction by percent benchmarking")
print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included, n_loops))
print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included,
n_loops))
n_portfolio = 10
......@@ -41,10 +44,15 @@ def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float, n_groups: int) -> None:
def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float,
n_groups: int) -> None:
print("-" * 60)
print("Starting portfolio construction by percent with group-by values benchmarking")
print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, p_included, n_loops, n_groups))
print(
"Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
p_included,
n_loops,
n_groups))
n_portfolio = 10
......
......@@ -6,15 +6,18 @@ Created on 2017-4-27
"""
import datetime as dt
import numpy as np
import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build
def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("-" * 60)
print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops))
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included,
n_loops))
n_portfolio = 10
......@@ -40,10 +43,15 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None:
def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int,
n_groups: int) -> None:
print("-" * 60)
print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups))
print(
"Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_included,
n_loops,
n_groups))
n_portfolio = 10
......
......@@ -3,4 +3,4 @@
Created on 2017-4-28
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -6,15 +6,19 @@ Created on 2017-4-28
"""
import datetime as dt
import numpy as np
import pandas as pd
from alphamind.settlement.simplesettle import simple_settle
def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
print("-" * 60)
print("Starting simple settle benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops))
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples,
n_portfolios,
n_loops))
weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples)
......@@ -37,10 +41,12 @@ def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None:
def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60)
print("Starting simple settle with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups))
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(
n_samples, n_portfolios, n_loops, n_groups))
weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples)
......
......@@ -7,7 +7,9 @@ Created on 2017-6-29
import argparse
from collections import namedtuple
from sqlalchemy import create_engine
from alphamind.data.dbmodel import models
from alphamind.utilities import alpha_logger
......@@ -64,4 +66,4 @@ class CLIFactory(object):
def get_parser():
return CLIFactory.get_parser()
\ No newline at end of file
return CLIFactory.get_parser()
......@@ -10,7 +10,16 @@ cimport numpy as cnp
from libcpp.string cimport string
from libcpp.vector cimport vector
import numpy as np
from PyFin.api import pyFinAssert
cimport
numpy as cnp
import numpy as np
from libcpp.string cimport
string
from libcpp.vector cimport
vector
cdef extern from "lpoptimizer.hpp" namespace "pfopt":
......
......@@ -5,11 +5,10 @@ Created on 2017-4-25
@author: cheng.li
"""
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal as winsorize
from alphamind.data.neutralize import neutralize
from alphamind.data.rank import rank
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal as winsorize
__all__ = ['standardize',
'winsorize',
......
......@@ -4,4 +4,3 @@ Created on 2017-6-29
@author: cheng.li
"""
......@@ -5,7 +5,7 @@ Created on 2017-6-29
@author: cheng.li
"""
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text, JSON
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
......@@ -72,7 +72,6 @@ class FundMaster(Base):
class Categories(Base):
__tablename__ = 'categories'
__table_args__ = (
Index('categories_pk', 'trade_date', 'code', unique=True),
......@@ -100,7 +99,8 @@ class FactorMaster(Base):
class HaltList(Base):
__tablename__ = 'halt_list'
__table_args__ = (
Index('halt_list_Date_Code_haltBeginTime_uindex', 'trade_date', 'code', 'haltBeginTime', unique=True),
Index('halt_list_Date_Code_haltBeginTime_uindex', 'trade_date', 'code', 'haltBeginTime',
unique=True),
)
trade_date = Column(DateTime, primary_key=True, nullable=False)
......
......@@ -7,68 +7,115 @@ Created on 2018-1-24
INDUSTRY_MAPPING = {
'sw': {
1: ["采掘", "传媒", "电气设备", "电子", "房地产", "纺织服装", "非银金融", "钢铁", "公用事业", "国防军工", "化工", "机械设备", "计算机", "家用电器", "建筑材料",
"建筑装饰", "交通运输", "农林牧渔", "汽车", "轻工制造", "商业贸易", "食品饮料", "通信", "休闲服务", "医药生物", "银行", "有色金属", "综合"],
2: ["白色家电", "半导体", "包装印刷", "保险", "玻璃制造", "采掘服务", "餐饮", "畜禽养殖", "船舶制造", "地面兵装", "电机", "电力", "电气自动化设备", "电源设备",
"电子制造", "动物保健", "多元金融", "房地产开发", "房屋建设", "纺织制造", "服装家纺", "钢铁", "港口", "高低压设备", "高速公路", "工业金属", "公交", "光学光电子",
"航空运输", "航空装备", "航天装备", "航运", "互联网传媒", "化学纤维", "化学原料", "化学制品", "化学制药", "环保工程及服务", "黄金", "机场", "基础建设",
"计算机设备", "计算机应用", "家用轻工", "金属非金属新材料", "金属制品", "景点", "酒店", "林业", "旅游综合", "贸易", "煤炭开采", "农产品加工", "农业综合",
"其他采掘", "其他电子", "其他建材", "其他交运设备", "其他轻工制造", "其他休闲服务", "汽车服务", "汽车零部件", "汽车整车", "燃气", "商业物业经营", "生物制品",
"石油化工", "石油开采", "食品加工", "视听器材", "水泥制造", "水务", "饲料", "塑料", "铁路运输", "通信设备", "通信运营", "通用机械", "文化传媒", "物流",
"稀有金属", "橡胶", "一般零售", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "银行", "饮料制造", "营销传播", "渔业", "元件", "园林工程", "园区开发",
1: ["采掘", "传媒", "电气设备", "电子", "房地产", "纺织服装", "非银金融", "钢铁", "公用事业", "国防军工", "化工", "机械设备",
"计算机", "家用电器", "建筑材料",
"建筑装饰", "交通运输", "农林牧渔", "汽车", "轻工制造", "商业贸易", "食品饮料", "通信", "休闲服务", "医药生物", "银行",
"有色金属", "综合"],
2: ["白色家电", "半导体", "包装印刷", "保险", "玻璃制造", "采掘服务", "餐饮", "畜禽养殖", "船舶制造", "地面兵装", "电机", "电力",
"电气自动化设备", "电源设备",
"电子制造", "动物保健", "多元金融", "房地产开发", "房屋建设", "纺织制造", "服装家纺", "钢铁", "港口", "高低压设备", "高速公路",
"工业金属", "公交", "光学光电子",
"航空运输", "航空装备", "航天装备", "航运", "互联网传媒", "化学纤维", "化学原料", "化学制品", "化学制药", "环保工程及服务", "黄金",
"机场", "基础建设",
"计算机设备", "计算机应用", "家用轻工", "金属非金属新材料", "金属制品", "景点", "酒店", "林业", "旅游综合", "贸易", "煤炭开采",
"农产品加工", "农业综合",
"其他采掘", "其他电子", "其他建材", "其他交运设备", "其他轻工制造", "其他休闲服务", "汽车服务", "汽车零部件", "汽车整车", "燃气",
"商业物业经营", "生物制品",
"石油化工", "石油开采", "食品加工", "视听器材", "水泥制造", "水务", "饲料", "塑料", "铁路运输", "通信设备", "通信运营",
"通用机械", "文化传媒", "物流",
"稀有金属", "橡胶", "一般零售", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "银行", "饮料制造", "营销传播", "渔业", "元件",
"园林工程", "园区开发",
"运输设备", "造纸", "证券", "中药", "种植业", "专业工程", "专业零售", "专用设备", "装修装饰", "综合"],
3: ["IT服务", "LED", "氨纶", "白酒", "百货", "半导体材料", "包装印刷", "保险", "被动元件", "冰箱", "玻璃制造", "玻纤", "彩电", "餐饮", "超市",
"城轨建设", "乘用车", "储能设备", "畜禽养殖", "船舶制造", "纯碱", "磁性材料", "氮肥", "低压设备", "涤纶", "地面兵装", "电机", "电网自动化", "电子零部件制造",
"电子系统组装", "动物保健", "多业态零售", "多元金融", "房地产开发", "房屋建设", "纺织服装设备", "纺织化学用品", "非金属新材料", "分立器件", "风电设备", "氟化工及制冷剂",
"辅料", "复合肥", "改性塑料", "钢结构", "港口", "高速公路", "高压设备", "工程机械", "工控自动化", "公交", "管材", "光伏设备", "光学元件", "国际工程承包",
"果蔬加工", "海洋捕捞", "航空运输", "航空装备", "航天装备", "航运", "合成革", "互联网信息服务", "化学工程", "化学原料药", "化学制剂", "环保工程及服务", "环保设备",
"黄金", "黄酒", "火电", "火电设备", "机场", "机床工具", "机械基础件", "集成电路", "计量仪表", "计算机设备", "家电零部件", "家纺", "家具", "钾肥", "焦炭加工",
"金属新材料", "金属制品", "酒店", "聚氨酯", "空调", "锂", "粮食种植", "粮油加工", "林业", "磷肥", "磷化工及磷酸盐", "楼宇设备", "路桥施工", "轮胎",
"旅游综合", "铝", "氯碱", "毛纺", "贸易", "煤炭开采", "棉纺", "民爆用品", "磨具磨料", "耐火材料", "男装", "内燃机", "农药", "农业综合", "农用机械",
"女装", "啤酒", "平面媒体", "葡萄酒", "普钢", "其他采掘", "其他采掘服务", "其他电子", "其他纺织", "其他服装", "其他互联网服务", "其他化学原料", "其他化学制品",
"其他基础建设", "其他家用轻工", "其他建材", "其他交运设备", "其他酒类", "其他农产品加工", "其他轻工制造", "其他塑料制品", "其他文化传媒", "其他稀有小金属", "其他纤维",
"其他橡胶制品", "其他休闲服务", "其他种植业", "其他专业工程", "其它电源设备", "其它视听器材", "其它通用机械", "其它专用机械", "汽车服务", "汽车零部件", "铅锌",
"燃机发电", "燃气", "热电", "人工景点", "日用化学产品", "肉制品", "乳品", "软件开发", "软饮料", "商用载货车", "商用载客车", "生物制品", "石油加工", "石油开采",
"石油贸易", "食品综合", "水产养殖", "水电", "水利工程", "水泥制造", "水务", "丝绸", "饲料", "炭黑", "特钢", "调味发酵品", "铁路建设", "铁路设备", "铁路运输",
"通信传输设备", "通信配套服务", "通信运营", "铜", "涂料油漆油墨制造", "维纶", "文娱用品", "钨", "无机盐", "物流", "稀土", "洗衣机", "显示器件", "线缆部件及其他",
"小家电", "鞋帽", "新能源发电", "休闲服装", "冶金矿采化工设备", "一般物业经营", "医疗服务", "医疗器械", "医药商业", "仪器仪表", "移动互联网服务", "银行", "印染",
"印刷包装机械", "印制电路板", "营销服务", "影视动漫", "油气钻采服务", "有线电视网络", "园林工程", "园区开发", "造纸", "粘胶", "证券", "制冷空调设备", "中压设备",
3: ["IT服务", "LED", "氨纶", "白酒", "百货", "半导体材料", "包装印刷", "保险", "被动元件", "冰箱", "玻璃制造", "玻纤",
"彩电", "餐饮", "超市",
"城轨建设", "乘用车", "储能设备", "畜禽养殖", "船舶制造", "纯碱", "磁性材料", "氮肥", "低压设备", "涤纶", "地面兵装", "电机",
"电网自动化", "电子零部件制造",
"电子系统组装", "动物保健", "多业态零售", "多元金融", "房地产开发", "房屋建设", "纺织服装设备", "纺织化学用品", "非金属新材料",
"分立器件", "风电设备", "氟化工及制冷剂",
"辅料", "复合肥", "改性塑料", "钢结构", "港口", "高速公路", "高压设备", "工程机械", "工控自动化", "公交", "管材", "光伏设备",
"光学元件", "国际工程承包",
"果蔬加工", "海洋捕捞", "航空运输", "航空装备", "航天装备", "航运", "合成革", "互联网信息服务", "化学工程", "化学原料药", "化学制剂",
"环保工程及服务", "环保设备",
"黄金", "黄酒", "火电", "火电设备", "机场", "机床工具", "机械基础件", "集成电路", "计量仪表", "计算机设备", "家电零部件", "家纺",
"家具", "钾肥", "焦炭加工",
"金属新材料", "金属制品", "酒店", "聚氨酯", "空调", "锂", "粮食种植", "粮油加工", "林业", "磷肥", "磷化工及磷酸盐", "楼宇设备",
"路桥施工", "轮胎",
"旅游综合", "铝", "氯碱", "毛纺", "贸易", "煤炭开采", "棉纺", "民爆用品", "磨具磨料", "耐火材料", "男装", "内燃机", "农药",
"农业综合", "农用机械",
"女装", "啤酒", "平面媒体", "葡萄酒", "普钢", "其他采掘", "其他采掘服务", "其他电子", "其他纺织", "其他服装", "其他互联网服务",
"其他化学原料", "其他化学制品",
"其他基础建设", "其他家用轻工", "其他建材", "其他交运设备", "其他酒类", "其他农产品加工", "其他轻工制造", "其他塑料制品", "其他文化传媒",
"其他稀有小金属", "其他纤维",
"其他橡胶制品", "其他休闲服务", "其他种植业", "其他专业工程", "其它电源设备", "其它视听器材", "其它通用机械", "其它专用机械", "汽车服务",
"汽车零部件", "铅锌",
"燃机发电", "燃气", "热电", "人工景点", "日用化学产品", "肉制品", "乳品", "软件开发", "软饮料", "商用载货车", "商用载客车",
"生物制品", "石油加工", "石油开采",
"石油贸易", "食品综合", "水产养殖", "水电", "水利工程", "水泥制造", "水务", "丝绸", "饲料", "炭黑", "特钢", "调味发酵品",
"铁路建设", "铁路设备", "铁路运输",
"通信传输设备", "通信配套服务", "通信运营", "铜", "涂料油漆油墨制造", "维纶", "文娱用品", "钨", "无机盐", "物流", "稀土",
"洗衣机", "显示器件", "线缆部件及其他",
"小家电", "鞋帽", "新能源发电", "休闲服装", "冶金矿采化工设备", "一般物业经营", "医疗服务", "医疗器械", "医药商业", "仪器仪表",
"移动互联网服务", "银行", "印染",
"印刷包装机械", "印制电路板", "营销服务", "影视动漫", "油气钻采服务", "有线电视网络", "园林工程", "园区开发", "造纸", "粘胶", "证券",
"制冷空调设备", "中压设备",
"中药", "终端设备", "种子生产", "重型机械", "珠宝首饰", "专业连锁", "专业市场", "装修装饰", "自然景点", "综合", "综合电力设备商"]
},
'sw_adj': {
1: ["建筑材料", "机械设备", "家用电器", "交通运输", "化工", "纺织服装", "电气设备", "多元金融", "通信", "传媒", "信息服务", "银行", "农林牧渔", "建筑装饰",
"计算机", "轻工制造", "交运设备", "信息设备", "钢铁", "采掘", "建筑建材", "商业贸易", "房地产", "有色金属", "国防军工", "医药生物", "汽车", "公用事业",
1: ["建筑材料", "机械设备", "家用电器", "交通运输", "化工", "纺织服装", "电气设备", "多元金融", "通信", "传媒", "信息服务", "银行",
"农林牧渔", "建筑装饰",
"计算机", "轻工制造", "交运设备", "信息设备", "钢铁", "采掘", "建筑建材", "商业贸易", "房地产", "有色金属", "国防军工",
"医药生物", "汽车", "公用事业",
"保险", "休闲服务", "证券", "电子", "综合", "食品饮料"]
},
'zz': {
1: ["电信业务", "工业", "公用事业", "金融地产", "可选消费", "能源", "信息技术", "医药卫生", "原材料", "主要消费"],
2: ["半导体", "保险", "传媒", "电信服务", "房地产", "公用事业", "计算机及电子设备", "计算机运用", "家庭与个人用品", "交通运输", "零售业", "耐用消费品与服装", "能源",
"其他金融", "汽车与汽车零部件", "商业服务与用品", "食品、饮料与烟草", "食品与主要用品零售", "通信设备", "消费者服务", "医疗器械与服务", "医药生物", "银行", "原材料",
2: ["半导体", "保险", "传媒", "电信服务", "房地产", "公用事业", "计算机及电子设备", "计算机运用", "家庭与个人用品", "交通运输", "零售业",
"耐用消费品与服装", "能源",
"其他金融", "汽车与汽车零部件", "商业服务与用品", "食品、饮料与烟草", "食品与主要用品零售", "通信设备", "消费者服务", "医疗器械与服务",
"医药生物", "银行", "原材料",
"资本品", "资本市场"],
3: ["半导体", "包装食品与肉类", "保险", "传媒", "道路运输", "电力", "电脑与外围设备", "电气设备", "电网", "电信运营服务", "电信增值服务", "电子设备", "多元化零售",
"房地产管理与服务", "房地产开发与园区", "纺织服装", "非金属采矿及制品", "钢铁", "个人用品", "工业集团企业", "供热或其他公用事业", "航空公司", "航空航天与国防",
"航空货运与物流", "航运", "互联网服务", "互联网零售", "化学原料", "化学制品", "环保设备、工程与服务", "机械制造", "家常用品", "家庭耐用消费品", "建筑材料", "建筑产品",
"建筑与工程", "交通基本设施", "酒店、餐馆与休闲", "煤炭", "能源开采设备与服务", "农牧渔产品", "其他金融服务", "其他零售", "汽车零配件与轮胎", "汽车与摩托车", "燃气",
"日用品经销商", "容器与包装", "软件开发", "商业服务与用品", "商业银行", "生物科技", "石油与天然气", "食品与主要用品零售", "水务", "通信设备", "消费信贷", "信息技术服务",
"休闲设备与用品", "医疗器械", "医疗用品与服务提供商", "饮料", "有色金属", "纸类与林业产品", "制药", "制药与生物科技服务", "珠宝与奢侈品", "资本市场", "综合消费者服务"]
3: ["半导体", "包装食品与肉类", "保险", "传媒", "道路运输", "电力", "电脑与外围设备", "电气设备", "电网", "电信运营服务", "电信增值服务",
"电子设备", "多元化零售",
"房地产管理与服务", "房地产开发与园区", "纺织服装", "非金属采矿及制品", "钢铁", "个人用品", "工业集团企业", "供热或其他公用事业", "航空公司",
"航空航天与国防",
"航空货运与物流", "航运", "互联网服务", "互联网零售", "化学原料", "化学制品", "环保设备、工程与服务", "机械制造", "家常用品",
"家庭耐用消费品", "建筑材料", "建筑产品",
"建筑与工程", "交通基本设施", "酒店、餐馆与休闲", "煤炭", "能源开采设备与服务", "农牧渔产品", "其他金融服务", "其他零售", "汽车零配件与轮胎",
"汽车与摩托车", "燃气",
"日用品经销商", "容器与包装", "软件开发", "商业服务与用品", "商业银行", "生物科技", "石油与天然气", "食品与主要用品零售", "水务",
"通信设备", "消费信贷", "信息技术服务",
"休闲设备与用品", "医疗器械", "医疗用品与服务提供商", "饮料", "有色金属", "纸类与林业产品", "制药", "制药与生物科技服务", "珠宝与奢侈品",
"资本市场", "综合消费者服务"]
},
'zjh': {
1: ["采矿业", "电力、热力、燃气及水生产和供应业", "房地产业", "建筑业", "交通运输、仓储和邮政业", "教育", "金融业", "居民服务、修理和其他服务业", "科学研究和技术服务业",
"农、林、牧、渔业", "批发和零售业", "水利、环境和公共设施管理业", "卫生和社会工作", "文化、体育和娱乐业", "信息传输、软件和信息技术服务业", "制造业", "住宿和餐饮业", "综合",
1: ["采矿业", "电力、热力、燃气及水生产和供应业", "房地产业", "建筑业", "交通运输、仓储和邮政业", "教育", "金融业", "居民服务、修理和其他服务业",
"科学研究和技术服务业",
"农、林、牧、渔业", "批发和零售业", "水利、环境和公共设施管理业", "卫生和社会工作", "文化、体育和娱乐业", "信息传输、软件和信息技术服务业", "制造业",
"住宿和餐饮业", "综合",
"租赁和商务服务业"],
2: ["保险业", "餐饮业", "仓储业", "畜牧业", "道路运输业", "电力、热力生产和供应业", "电气机械和器材制造业", "电信、广播电视和卫星传输服务", "房地产业", "房屋建筑业",
"纺织服装、服饰业", "纺织业", "非金属矿采选业", "非金属矿物制品业", "废弃资源综合利用业", "公共设施管理业", "广播、电视、电影和影视录音制作业", "航空运输业", "黑色金属矿采选业",
"黑色金属冶炼和压延加工业", "互联网和相关服务", "化学纤维制造业", "化学原料和化学制品制造业", "货币金融服务", "机动车、电子产品和日用产品修理业", "计算机、通信和其他电子设备制造业",
"家具制造业", "建筑安装业", "建筑装饰和其他建筑业", "教育", "金属制品业", "酒、饮料和精制茶制造业", "开采辅助活动", "林业", "零售业", "煤炭开采和洗选业",
"木材加工和木、竹、藤、棕、草制品业", "农、林、牧、渔服务业", "农副食品加工业", "农业", "批发业", "皮革、毛皮、羽毛及其制品和制鞋业", "其他金融业", "其他制造业", "汽车制造业",
"燃气生产和供应业", "软件和信息技术服务业", "商务服务业", "生态保护和环境治理业", "石油和天然气开采业", "石油加工、炼焦和核燃料加工业", "食品制造业", "水的生产和供应业",
"水利管理业", "水上运输业", "体育", "铁路、船舶、航空航天和其它运输设备制造业", "铁路运输业", "通用设备制造业", "土木工程建筑业", "卫生", "文化艺术业",
"文教、工美、体育和娱乐用品制造业", "橡胶和塑料制品业", "新闻和出版业", "研究和试验发展", "医药制造业", "仪器仪表制造业", "印刷和记录媒介复制业", "邮政业", "有色金属矿采选业",
"有色金属冶炼和压延加工业", "渔业", "造纸和纸制品业", "住宿业", "专业技术服务业", "专用设备制造业", "装卸搬运和运输代理业", "资本市场服务", "综合", "租赁业"],
2: ["保险业", "餐饮业", "仓储业", "畜牧业", "道路运输业", "电力、热力生产和供应业", "电气机械和器材制造业", "电信、广播电视和卫星传输服务",
"房地产业", "房屋建筑业",
"纺织服装、服饰业", "纺织业", "非金属矿采选业", "非金属矿物制品业", "废弃资源综合利用业", "公共设施管理业", "广播、电视、电影和影视录音制作业",
"航空运输业", "黑色金属矿采选业",
"黑色金属冶炼和压延加工业", "互联网和相关服务", "化学纤维制造业", "化学原料和化学制品制造业", "货币金融服务", "机动车、电子产品和日用产品修理业",
"计算机、通信和其他电子设备制造业",
"家具制造业", "建筑安装业", "建筑装饰和其他建筑业", "教育", "金属制品业", "酒、饮料和精制茶制造业", "开采辅助活动", "林业", "零售业",
"煤炭开采和洗选业",
"木材加工和木、竹、藤、棕、草制品业", "农、林、牧、渔服务业", "农副食品加工业", "农业", "批发业", "皮革、毛皮、羽毛及其制品和制鞋业", "其他金融业",
"其他制造业", "汽车制造业",
"燃气生产和供应业", "软件和信息技术服务业", "商务服务业", "生态保护和环境治理业", "石油和天然气开采业", "石油加工、炼焦和核燃料加工业", "食品制造业",
"水的生产和供应业",
"水利管理业", "水上运输业", "体育", "铁路、船舶、航空航天和其它运输设备制造业", "铁路运输业", "通用设备制造业", "土木工程建筑业", "卫生",
"文化艺术业",
"文教、工美、体育和娱乐用品制造业", "橡胶和塑料制品业", "新闻和出版业", "研究和试验发展", "医药制造业", "仪器仪表制造业", "印刷和记录媒介复制业",
"邮政业", "有色金属矿采选业",
"有色金属冶炼和压延加工业", "渔业", "造纸和纸制品业", "住宿业", "专业技术服务业", "专用设备制造业", "装卸搬运和运输代理业", "资本市场服务",
"综合", "租赁业"],
},
'dx': {
1: ["Cyclical", "Defensive", "Sensitive"],
2: ["ConsumerDiscretionary", "ConsumerStaples", "Financials", "HealthCare", "Industrials", "IT", "Materials",
2: ["ConsumerDiscretionary", "ConsumerStaples", "Financials", "HealthCare", "Industrials",
"IT", "Materials",
"RealEstate", "Utilities"]
}
}
......@@ -5,37 +5,35 @@ Created on 2017-7-7
@author: cheng.li
"""
from typing import Dict
from typing import Iterable
from typing import List
from typing import Dict
from typing import Tuple
from typing import Union
import numpy as np
import pandas as pd
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy import select, and_, outerjoin, join, column
from sqlalchemy.sql import func
from alphamind.data.engines.universe import Universe
from alphamind.data.dbmodel.models import FactorMaster
from alphamind.data.dbmodel.models import FundHolding
from alphamind.data.dbmodel.models import FundMaster
from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data.dbmodel.models import IndexMarket
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import RiskMaster
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import IndexMarket
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import FundMaster
from alphamind.data.dbmodel.models import FundHolding
from alphamind.data.transformer import Transformer
from alphamind.data.dbmodel.models import RiskMaster
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.engines.universe import Universe
from alphamind.data.engines.utilities import _map_factors
from alphamind.data.engines.utilities import _map_industry_category
from alphamind.data.engines.utilities import _map_risk_model_table
from alphamind.data.engines.utilities import factor_tables
from alphamind.data.processing import factor_processing
from alphamind.data.transformer import Transformer
from alphamind.portfolio.riskmodel import FactorRiskModel
from PyFin.api import advanceDateByCalendar
risk_styles = ['BETA',
'MOMENTUM',
......@@ -119,14 +117,15 @@ class SqlEngine(object):
def fetch_fund_holding(self,
fund_codes,
start_date: str=None,
end_date: str=None,
dates: Iterable[str]=None) -> pd.DataFrame:
start_date: str = None,
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
query = select([FundHolding]).where(
and_(
FundHolding.fund_code.in_(fund_codes),
FundHolding.reportDate.in_(dates) if dates else FundHolding.reportDate.between(start_date, end_date)
FundHolding.reportDate.in_(dates) if dates else FundHolding.reportDate.between(
start_date, end_date)
)
)
return pd.read_sql(query, self.session.bind)
......@@ -154,7 +153,9 @@ class SqlEngine(object):
stats = func.sum(self.ln_func(1. + table.chgPct)).over(
partition_by=getattr(table, code_attr),
order_by=table.trade_date,
rows=(1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
rows=(
1 + DAILY_RETURN_OFFSET + offset, 1 + horizon + DAILY_RETURN_OFFSET + offset)).label(
'dx')
return stats
def fetch_dx_return(self,
......@@ -166,12 +167,14 @@ class SqlEngine(object):
neutralized_risks: list = None,
pre_process=None,
post_process=None,
benchmark: int=None) -> pd.DataFrame:
benchmark: int = None) -> pd.DataFrame:
start_date = ref_date
if not expiry_date:
end_date = advanceDateByCalendar('china.sse', ref_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
str(
1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y%m%d')
else:
end_date = expiry_date
......@@ -216,14 +219,16 @@ class SqlEngine(object):
dates: Iterable[str] = None,
horizon: int = 0,
offset: int = 0,
benchmark: int=None) -> pd.DataFrame:
benchmark: int = None) -> pd.DataFrame:
if dates:
start_date = dates[0]
end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
str(
1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y-%m-%d')
stats = self._create_stats(Market, horizon, offset)
......@@ -268,7 +273,9 @@ class SqlEngine(object):
if not expiry_date:
end_date = advanceDateByCalendar('china.sse', ref_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
str(
1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y%m%d')
else:
end_date = expiry_date
......@@ -297,7 +304,9 @@ class SqlEngine(object):
end_date = dates[-1]
end_date = advanceDateByCalendar('china.sse', end_date,
str(1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
str(
1 + horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime(
'%Y-%m-%d')
stats = self._create_stats(IndexMarket, horizon, offset, code_attr='indexCode')
query = select([IndexMarket.trade_date, IndexMarket.indexCode.label('code'), stats]) \
......@@ -333,7 +342,8 @@ class SqlEngine(object):
else:
factor_cols = _map_factors(dependency, factor_tables)
start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime('%Y-%m-%d')
start_date = advanceDateByCalendar('china.sse', ref_date, str(-warm_start) + 'b').strftime(
'%Y-%m-%d')
end_date = ref_date
big_table = Market
......@@ -348,7 +358,8 @@ class SqlEngine(object):
joined_tables.add(t.__table__.name)
query = select(
[Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list(factor_cols.keys())) \
[Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list(
factor_cols.keys())) \
.select_from(big_table).where(and_(Market.trade_date.between(start_date, end_date),
Market.code.in_(codes)))
......@@ -398,18 +409,21 @@ class SqlEngine(object):
else:
big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
Market.code == t.code,
Market.trade_date.between(start_date, end_date)))
Market.trade_date.between(start_date,
end_date)))
joined_tables.add(t.__table__.name)
universe_df = universe.query(self, start_date, end_date, dates)
query = select(
[Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list(factor_cols.keys())) \
[Market.trade_date, Market.code, Market.chgPct, Market.secShortName] + list(
factor_cols.keys())) \
.select_from(big_table).where(
and_(
Market.code.in_(universe_df.code.unique().tolist()),
Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(start_date, end_date)
)
and_(
Market.code.in_(universe_df.code.unique().tolist()),
Market.trade_date.in_(dates) if dates is not None else Market.trade_date.between(
start_date, end_date)
)
).distinct()
df = pd.read_sql(query, self.engine).replace([-np.inf, np.inf], np.nan)
......@@ -424,7 +438,8 @@ class SqlEngine(object):
res['chgPct'] = df.chgPct
res['secShortName'] = df['secShortName']
res = res.reset_index()
return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
return pd.merge(res, universe_df[['trade_date', 'code']], how='inner').drop_duplicates(
['trade_date', 'code'])
def fetch_factor_range_forward(self,
universe: Universe,
......@@ -457,14 +472,16 @@ class SqlEngine(object):
else:
big_table = outerjoin(big_table, t, and_(Market.trade_date == t.trade_date,
Market.code == t.code,
Market.trade_date.between(start_date, end_date)))
Market.trade_date.between(start_date,
end_date)))
joined_tables.add(t.__table__.name)
stats = func.lag(list(factor_cols.keys())[0], -1).over(
partition_by=Market.code,
order_by=Market.trade_date).label('dx')
query = select([Market.trade_date, Market.code, Market.chgPct, stats]).select_from(big_table).where(
query = select([Market.trade_date, Market.code, Market.chgPct, stats]).select_from(
big_table).where(
and_(
Market.trade_date.in_(total_dates),
Market.code.in_(total_codes)
......@@ -474,12 +491,13 @@ class SqlEngine(object):
df = pd.read_sql(query, self.engine) \
.replace([-np.inf, np.inf], np.nan) \
.sort_values(['trade_date', 'code'])
return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(['trade_date', 'code'])
return pd.merge(df, codes[['trade_date', 'code']], how='inner').drop_duplicates(
['trade_date', 'code'])
def fetch_benchmark(self,
ref_date: str,
benchmark: int,
codes: Iterable[int]=None) -> pd.DataFrame:
codes: Iterable[int] = None) -> pd.DataFrame:
query = select([IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.trade_date == ref_date,
......@@ -501,11 +519,13 @@ class SqlEngine(object):
end_date: str = None,
dates: Iterable[str] = None) -> pd.DataFrame:
cond = IndexComponent.trade_date.in_(dates) if dates else IndexComponent.trade_date.between(start_date,
end_date)
cond = IndexComponent.trade_date.in_(dates) if dates else IndexComponent.trade_date.between(
start_date,
end_date)
query = select(
[IndexComponent.trade_date, IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where(
[IndexComponent.trade_date, IndexComponent.code,
(IndexComponent.weight / 100.).label('weight')]).where(
and_(
cond,
IndexComponent.indexCode == benchmark
......@@ -518,7 +538,8 @@ class SqlEngine(object):
codes: Iterable[int],
risk_model: str = 'short',
excluded: Iterable[str] = None,
model_type: str = None) -> Union[FactorRiskModel, Tuple[pd.DataFrame, pd.DataFrame]]:
model_type: str = None) -> Union[
FactorRiskModel, Tuple[pd.DataFrame, pd.DataFrame]]:
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
......@@ -530,7 +551,8 @@ class SqlEngine(object):
risk_cov = pd.read_sql(query, self.engine).sort_values('FactorID')
if excluded:
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)]
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if
f not in set(excluded)]
else:
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors]
......@@ -541,7 +563,8 @@ class SqlEngine(object):
RiskExposure.trade_date == special_risk_table.trade_date
))
query = select([RiskExposure.code, special_risk_table.SRISK.label('srisk')] + risk_exposure_cols) \
query = select(
[RiskExposure.code, special_risk_table.SRISK.label('srisk')] + risk_exposure_cols) \
.select_from(big_table).where(
and_(RiskExposure.trade_date == ref_date,
RiskExposure.code.in_(codes)
......@@ -571,8 +594,9 @@ class SqlEngine(object):
risk_cov_table, special_risk_table = _map_risk_model_table(risk_model)
cov_risk_cols = [risk_cov_table.__table__.columns[f] for f in total_risk_factors]
cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(start_date,
end_date)
cond = risk_cov_table.trade_date.in_(dates) if dates else risk_cov_table.trade_date.between(
start_date,
end_date)
query = select([risk_cov_table.trade_date,
risk_cov_table.FactorID,
risk_cov_table.Factor]
......@@ -584,7 +608,8 @@ class SqlEngine(object):
if not excluded:
excluded = []
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if f not in set(excluded)]
risk_exposure_cols = [RiskExposure.__table__.columns[f] for f in total_risk_factors if
f not in set(excluded)]
cond = universe._query_statements(start_date, end_date, dates)
big_table = join(RiskExposure, UniverseTable,
and_(
......@@ -639,13 +664,13 @@ class SqlEngine(object):
category_name = 'industryName' + str(level)
cond = and_(
Industry.trade_date == ref_date,
Industry.code.in_(codes),
Industry.industry == industry_category_name
) if codes else and_(
Industry.trade_date == ref_date,
Industry.industry == industry_category_name
)
Industry.trade_date == ref_date,
Industry.code.in_(codes),
Industry.industry == industry_category_name
) if codes else and_(
Industry.trade_date == ref_date,
Industry.industry == industry_category_name
)
query = select([Industry.code,
getattr(Industry, code_name).label('industry_code'),
......@@ -689,7 +714,7 @@ class SqlEngine(object):
query = select([Industry.trade_date,
Industry.code,
getattr(Industry, code_name).label('industry_code'),
getattr(Industry, category_name).label('industry')]).select_from(big_table)\
getattr(Industry, category_name).label('industry')]).select_from(big_table) \
.order_by(Industry.trade_date, Industry.code)
return pd.read_sql(query, self.engine).dropna()
......@@ -756,7 +781,8 @@ class SqlEngine(object):
)
).cte('cte')
query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'], cte.columns['code'])
query = select([cte]).select_from(cte).order_by(cte.columns['trade_date'],
cte.columns['code'])
df = pd.read_sql(query, self.engine)
return pd.merge(df, codes[['trade_date', 'code']], on=['trade_date', 'code'])
......@@ -821,12 +847,14 @@ class SqlEngine(object):
if benchmark:
benchmark_data = self.fetch_benchmark_range(benchmark, start_date, end_date, dates)
total_data['benchmark'] = benchmark_data
factor_data = pd.merge(factor_data, benchmark_data, how='left', on=['trade_date', 'code'])
factor_data = pd.merge(factor_data, benchmark_data, how='left',
on=['trade_date', 'code'])
factor_data['weight'] = factor_data['weight'].fillna(0.)
if risk_model:
excluded = list(set(total_risk_factors).intersection(transformer.dependency))
risk_cov, risk_exp = self.fetch_risk_model_range(universe, start_date, end_date, dates, risk_model,
risk_cov, risk_exp = self.fetch_risk_model_range(universe, start_date, end_date, dates,
risk_model,
excluded)
factor_data = pd.merge(factor_data, risk_exp, how='left', on=['trade_date', 'code'])
total_data['risk_cov'] = risk_cov
......@@ -849,7 +877,8 @@ if __name__ == '__main__':
freq = "1m"
universe = Universe('zz800')
engine = SqlEngine('postgresql+psycopg2://alpha:alpha@180.166.26.82:8889')
rebalance_dates = makeSchedule('2015-01-31', '2019-05-30', freq, 'china.sse', BizDayConventions.Preceding)
rebalance_dates = makeSchedule('2015-01-31', '2019-05-30', freq, 'china.sse',
BizDayConventions.Preceding)
formula = CSTopN(LAST('EP'), 5, groups='sw1')
factors = engine.fetch_factor_range(universe, {'alpha': formula}, dates=rebalance_dates)
\ No newline at end of file
factors = engine.fetch_factor_range(universe, {'alpha': formula}, dates=rebalance_dates)
......@@ -5,13 +5,15 @@ Created on 2017-7-7
@author: cheng.li
"""
import sys
import abc
import sys
import pandas as pd
from sqlalchemy import and_
from sqlalchemy import or_
from sqlalchemy import not_
from sqlalchemy import or_
from sqlalchemy import select
from alphamind.data.dbmodel.models import Universe as UniverseTable
......@@ -53,7 +55,8 @@ class BaseUniverse(metaclass=abc.ABCMeta):
def _query_statements(self, start_date: str = None, end_date: str = None, dates=None):
return and_(
self.condition(),
UniverseTable.trade_date.in_(dates) if dates else UniverseTable.trade_date.between(start_date, end_date)
UniverseTable.trade_date.in_(dates) if dates else UniverseTable.trade_date.between(
start_date, end_date)
)
......@@ -177,7 +180,6 @@ def load_universe(u_desc: dict):
if __name__ == '__main__':
from PyFin.api import *
from alphamind.data.engines.sqlengine import SqlEngine
engine = SqlEngine()
......
......@@ -5,21 +5,21 @@ Created on 2017-12-25
@author: cheng.li
"""
from typing import Iterable
from typing import Dict
from typing import Iterable
from alphamind.data.dbmodel.models import Categories
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import RiskCovDay
from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import SpecificRiskDay
from alphamind.data.dbmodel.models import SpecificRiskShort
from alphamind.data.dbmodel.models import SpecificRiskLong
from alphamind.data.dbmodel.models import SpecificRiskShort
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Categories
from alphamind.data.engines.industries import INDUSTRY_MAPPING
factor_tables = [Market, RiskExposure, Uqer, Categories]
......@@ -66,5 +66,5 @@ def _map_industry_category(category: str) -> str:
raise ValueError("No other industry is supported at the current time")
def industry_list(category: str, level: int=1) -> list:
return INDUSTRY_MAPPING[category][level]
\ No newline at end of file
def industry_list(category: str, level: int = 1) -> list:
return INDUSTRY_MAPPING[category][level]
......@@ -5,21 +5,22 @@ Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
import numba as nb
from typing import Dict
from typing import Tuple
from typing import Union
from typing import Dict
import numba as nb
import numpy as np
import alphamind.utilities as utils
def neutralize(x: np.ndarray,
y: np.ndarray,
groups: np.ndarray=None,
detail: bool=False,
groups: np.ndarray = None,
detail: bool = False,
weights: np.ndarray = None) \
-> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
if y.ndim == 1:
y = y.reshape((-1, 1))
......
......@@ -5,19 +5,20 @@ Created on 2017-8-21
@author: cheng.li
"""
from typing import Optional
from typing import List
from typing import Optional
import numpy as np
from alphamind.data.neutralize import neutralize
from alphamind.utilities import alpha_logger
def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None,
risk_factors: Optional[np.ndarray]=None,
post_process: Optional[List]=None,
pre_process: Optional[List] = None,
risk_factors: Optional[np.ndarray] = None,
post_process: Optional[List] = None,
groups=None) -> np.ndarray:
new_factors = raw_factors
if pre_process:
......@@ -31,7 +32,8 @@ def factor_processing(raw_factors: np.ndarray,
if post_process:
for p in post_process:
if p.__name__ == 'winsorize_normal':
alpha_logger.warning("winsorize_normal normally should not be done after neutralize")
alpha_logger.warning("winsorize_normal "
"normally should not be done after neutralize")
new_factors = p(new_factors, groups=groups)
return new_factors
......@@ -9,7 +9,6 @@ import numpy as np
def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
n = x.size
sorter = x.argsort()
inv = np.empty(n, dtype=int)
......@@ -17,7 +16,7 @@ def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
bin_size = float(n) / n_bins
pillars = [int(i * bin_size) for i in range(1, n_bins+1)]
pillars = [int(i * bin_size) for i in range(1, n_bins + 1)]
q_groups = np.empty(n, dtype=int)
......
......@@ -6,13 +6,14 @@ Created on 2017-8-8
"""
from typing import Optional
import numpy as np
from scipy.stats import rankdata
import alphamind.utilities as utils
import alphamind.utilities as utils
def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
if x.ndim == 1:
x = x.reshape((-1, 1))
......@@ -30,8 +31,7 @@ def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
def percentile(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
if x.ndim == 1:
x = x.reshape((-1, 1))
......
......@@ -6,17 +6,17 @@ Created on 2017-4-25
"""
import numpy as np
from alphamind.utilities import group_mapping
from alphamind.utilities import transform
from alphamind.utilities import aggregate
from alphamind.utilities import array_index
from alphamind.utilities import group_mapping
from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std
from alphamind.utilities import simple_sqrsum
from alphamind.utilities import simple_std
from alphamind.utilities import transform
def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
def standardize(x: np.ndarray, groups: np.ndarray = None, ddof=1) -> np.ndarray:
if groups is not None:
groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean')
......@@ -27,7 +27,7 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
def projection(x: np.ndarray, groups: np.ndarray = None, axis=1) -> np.ndarray:
if groups is not None and axis == 0:
groups = group_mapping(groups)
projected = transform(groups, x, 'project')
......@@ -38,13 +38,13 @@ def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
class Standardizer(object):
def __init__(self, ddof: int=1):
def __init__(self, ddof: int = 1):
self.ddof = ddof
self.mean = None
self.std = None
self.labels = None
def fit(self, x: np.ndarray, groups: np.ndarray=None):
def fit(self, x: np.ndarray, groups: np.ndarray = None):
if groups is not None:
group_index = group_mapping(groups)
self.mean = aggregate(group_index, x, 'mean')
......@@ -54,12 +54,12 @@ class Standardizer(object):
self.mean = simple_mean(x, axis=0)
self.std = simple_std(x, axis=0, ddof=self.ddof)
def transform(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
if groups is not None:
index = array_index(self.labels, groups)
return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8)
else:
return (x - self.mean) / np.maximum(self.std, 1e-8)
def __call__(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
return standardize(x, groups, self.ddof)
......@@ -6,14 +6,14 @@ Created on 2017-8-23
"""
import copy
import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import pyFinAssert
from PyFin.api import transform as transform_impl
def factor_translator(factor_pool):
if not factor_pool:
return None, None
......@@ -24,7 +24,8 @@ def factor_translator(factor_pool):
elif isinstance(factor_pool, dict):
dependency = set()
for k, v in factor_pool.items():
pyFinAssert(isinstance(k, str), ValueError, 'factor_name {0} should be string.'.format(k))
pyFinAssert(isinstance(k, str), ValueError,
'factor_name {0} should be string.'.format(k))
pyFinAssert(isinstance(v, SecurityValueHolder) or isinstance(v, str),
ValueError,
'expression {0} should be a value hodler or a string.'.format(v))
......@@ -80,5 +81,4 @@ class Transformer(object):
if __name__ == '__main__':
transformer = Transformer(['c', 'a'])
......@@ -5,14 +5,15 @@ Created on 2017-4-25
@author: cheng.li
"""
import numpy as np
import numba as nb
from alphamind.utilities import group_mapping
import numpy as np
from alphamind.utilities import aggregate
from alphamind.utilities import transform
from alphamind.utilities import array_index
from alphamind.utilities import group_mapping
from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std
from alphamind.utilities import transform
@nb.njit(nogil=True, cache=True)
......@@ -31,7 +32,6 @@ def mask_values_2d(x: np.ndarray,
res[i, j] = ubound
elif x[i, j] < lbound:
res[i, j] = lbound
return res
......@@ -54,7 +54,10 @@ def mask_values_1d(x: np.ndarray,
return res
def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarray = None) -> np.ndarray:
def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1,
groups: np.ndarray = None,
fill_method: str = 'flat',
fill_interval: int = 0.5) -> np.ndarray:
if groups is not None:
groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean')
......@@ -69,14 +72,14 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarra
class NormalWinsorizer(object):
def __init__(self, num_stds: int=3, ddof=1):
def __init__(self, num_stds: int = 3, ddof=1):
self.num_stds = num_stds
self.ddof = ddof
self.mean = None
self.std = None
self.labels = None
def fit(self, x: np.ndarray, groups: np.ndarray=None):
def fit(self, x: np.ndarray, groups: np.ndarray = None):
if groups is not None:
group_index = group_mapping(groups)
self.mean = aggregate(group_index, x, 'mean')
......@@ -86,12 +89,12 @@ class NormalWinsorizer(object):
self.mean = simple_mean(x, axis=0)
self.std = simple_std(x, axis=0, ddof=self.ddof)
def transform(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
if groups is not None:
index = array_index(self.labels, groups)
return mask_values_2d(x, self.mean[index], self.std[index], self.num_stds)
else:
return mask_values_1d(x, self.mean, self.std, self.num_stds)
def __call__(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray:
return winsorize_normal(x, self.num_stds, self.ddof, groups)
\ No newline at end of file
def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
return winsorize_normal(x, self.num_stds, self.ddof, groups)
......@@ -12,4 +12,4 @@ class PortfolioBuilderException(Exception):
self.msg = msg
def __str__(self):
return str(self.msg)
\ No newline at end of file
return str(self.msg)
......@@ -5,9 +5,8 @@ Created on 2017-9-22
@author: cheng.li
"""
import abc
from typing import Tuple
import numpy as np
import pandas as pd
......
......@@ -6,7 +6,9 @@ Created on 2017-9-22
"""
from typing import Tuple
import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase
......
......@@ -7,7 +7,9 @@ Created on 2017-9-25
from typing import List
from typing import Tuple
import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase
......
......@@ -6,9 +6,11 @@ Created on 2017-9-22
"""
from typing import Tuple
import pandas as pd
from PyFin.Math.Accumulators import MovingStandardDeviation
from PyFin.Math.Accumulators import MovingAverage
from PyFin.Math.Accumulators import MovingStandardDeviation
from alphamind.execution.baseexecutor import ExecutorBase
......
......@@ -6,7 +6,9 @@ Created on 2017-9-22
"""
from typing import Tuple
import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase
......
......@@ -5,8 +5,8 @@ Created on 2017-11-27
@author: cheng.li
"""
from alphamind.utilities import encode
from alphamind.utilities import decode
from alphamind.utilities import encode
def encode_formula(formula):
......@@ -30,4 +30,4 @@ if __name__ == '__main__':
str_repr = encode_formula(eps_q_res)
decoded_formula = decode_formula(str_repr)
print(decoded_formula)
\ No newline at end of file
print(decoded_formula)
......@@ -5,22 +5,18 @@ Created on 2017-5-2
@author: cheng.li
"""
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.loader import load_model
from alphamind.model.svm import NvSVRModel
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer
from alphamind.model.svm import NvSVRModel
from alphamind.model.loader import load_model
__all__ = ['LinearRegression',
'LassoRegression',
'ConstLinearModel',
......@@ -31,4 +27,4 @@ __all__ = ['LinearRegression',
'XGBClassifier',
'XGBTrainer',
'NvSVRModel',
'load_model']
\ No newline at end of file
'load_model']
......@@ -5,24 +5,26 @@ Created on 2017-9-27
@author: cheng.li
"""
import copy
import bisect
import copy
from typing import Iterable
from typing import Tuple
import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq
from alphamind.model.modelbase import ModelBase
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe
from alphamind.data.engines.universe import load_universe
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.rank import rank
from alphamind.data.standardize import standardize
from alphamind.model.loader import load_model
from alphamind.data.winsorize import winsorize_normal
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.loader import load_model
from alphamind.model.modelbase import ModelBase
PROCESS_MAPPING = {
'winsorize_normal': winsorize_normal,
......@@ -87,7 +89,7 @@ class DataMeta(object):
@classmethod
def load(cls, data_desc: dict):
freq = data_desc['freq']
universe =load_universe(data_desc['universe'])
universe = load_universe(data_desc['universe'])
batch = data_desc['batch']
neutralized_risk = data_desc['neutralized_risk']
risk_model = data_desc['risk_model']
......@@ -193,7 +195,8 @@ class Composer:
codes = x.index
return pd.DataFrame(model.predict(x).flatten(), index=codes), x
def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None, d_type: str = 'test') \
def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None,
d_type: str = 'test') \
-> Tuple[float, pd.DataFrame, pd.DataFrame]:
model = self._fetch_latest_model(ref_date)
if x is None or y is None:
......@@ -244,8 +247,7 @@ class Composer:
if __name__ == '__main__':
from alphamind.api import (risk_styles,
industry_styles,
from alphamind.api import (industry_styles,
standardize,
winsorize_normal,
DataMeta,
......@@ -287,13 +289,15 @@ if __name__ == '__main__':
warm_start=warm_start,
data_source=data_source)
alpha_model = LinearRegression(features=regress_features, fit_intercept=True, fit_target=fit_target)
alpha_model = LinearRegression(features=regress_features, fit_intercept=True,
fit_target=fit_target)
composer = Composer(alpha_model=alpha_model, data_meta=data_meta)
start_date = '2014-01-01'
end_date = '2016-01-01'
regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target)
regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept,
fit_target=fit_target)
regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta)
data_package1 = fetch_data_package(engine,
......
......@@ -7,21 +7,23 @@ Created on 2017-8-24
import bisect
import datetime as dt
import numpy as np
import pandas as pd
from typing import Iterable
from typing import Union
from PyFin.api import makeSchedule
import numpy as np
import pandas as pd
from PyFin.DateUtilities import Period
from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar
from PyFin.api import makeSchedule
from PyFin.api import pyFinAssert
from PyFin.DateUtilities import Period
from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.data.transformer import Transformer
from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
......@@ -60,7 +62,7 @@ def prepare_data(engine: SqlEngine,
universe: Universe,
benchmark: int,
warm_start: int = 0,
fit_target: Union[Transformer, object]=None):
fit_target: Union[Transformer, object] = None):
if warm_start > 0:
p = Period(frequency)
p = Period(length=-warm_start * p.length(), units=p.units())
......@@ -91,7 +93,8 @@ def prepare_data(engine: SqlEngine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
alpha_logger.info("fit target data loading finished")
......@@ -175,7 +178,8 @@ def batch_processing(names,
inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index], columns=names)
predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index],
columns=names)
if risk_exp is not None:
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
else:
......@@ -227,7 +231,8 @@ def fetch_data_package(engine: SqlEngine,
fit_target=fit_target)
target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
_merge_df(engine, names, factor_df, target_df, universe, dates, risk_model, neutralized_risk)
_merge_df(engine, names, factor_df, target_df, universe, dates, risk_model,
neutralized_risk)
alpha_logger.info("data merging finished")
......@@ -259,17 +264,24 @@ def fetch_data_package(engine: SqlEngine,
ret['x_names'] = names
ret['settlement'] = target_df[target_df.trade_date >= start_date]
train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if
k.strftime('%Y-%m-%d') >= start_date}
train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if
k.strftime('%Y-%m-%d') >= start_date}
train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if
k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if
k.strftime('%Y-%m-%d') >= start_date}
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if
k.strftime('%Y-%m-%d') >= start_date}
if neutralized_risk:
predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if
k.strftime('%Y-%m-%d') >= start_date}
else:
predict_risk_buckets = None
predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date}
predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if
k.strftime('%Y-%m-%d') >= start_date}
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
......@@ -312,7 +324,8 @@ def fetch_train_phase(engine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
......@@ -322,10 +335,12 @@ def fetch_train_phase(engine,
['trade_date', 'code'] + transformer.names]
target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \
_merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model, neutralized_risk)
_merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model,
neutralized_risk)
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date))
pyFinAssert(len(dates) >= 2, ValueError,
"No previous data for training for the date {0}".format(ref_date))
end = dates[-2]
start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
else:
......@@ -353,7 +368,8 @@ def fetch_train_phase(engine,
ret = dict()
ret['x_names'] = transformer.names
ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code}
ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y,
'code': this_code}
return ret
......@@ -392,7 +408,8 @@ def fetch_predict_phase(engine,
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fillna:
factor_df = factor_df.groupby('trade_date').apply(lambda x: x.fillna(x.median())).reset_index(
factor_df = factor_df.groupby('trade_date').apply(
lambda x: x.fillna(x.median())).reset_index(
drop=True).dropna()
else:
factor_df = factor_df.dropna()
......@@ -401,7 +418,8 @@ def fetch_predict_phase(engine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date])
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
......@@ -467,7 +485,7 @@ def fetch_predict_phase(engine,
ret = dict()
ret['x_names'] = transformer.names
ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes, 'y': ne_y.flatten()}
ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes,
'y': ne_y.flatten()}
return ret
......@@ -6,10 +6,11 @@ Created on 2017-5-10
"""
import numpy as np
from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from PyFin.api import pyFinAssert
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert
from alphamind.model.modelbase import create_model_base
......@@ -78,7 +79,8 @@ class LinearRegression(create_model_base('sklearn')):
class LassoRegression(create_model_base('sklearn')):
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None, **kwargs):
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None,
**kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
......
......@@ -5,21 +5,20 @@ Created on 2017-9-5
@author: cheng.li
"""
from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.modelbase import ModelBase
from alphamind.model.svm import NvSVRModel
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer
from alphamind.model.svm import NvSVRModel
def load_model(model_desc: dict) -> ModelBase:
model_name = model_desc['model_name']
model_name_parts = set(model_name.split('.'))
......
......@@ -7,16 +7,18 @@ Created on 2017-9-4
import abc
from distutils.version import LooseVersion
import arrow
import numpy as np
import pandas as pd
from simpleutils.miscellaneous import list_eq
from sklearn import __version__ as sklearn_version
from xgboost import __version__ as xgbboot_version
from alphamind.data.transformer import Transformer
from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode
from alphamind.data.transformer import Transformer
from alphamind.utilities import encode
class ModelBase(metaclass=abc.ABCMeta):
......@@ -67,7 +69,8 @@ class ModelBase(metaclass=abc.ABCMeta):
def save(self) -> dict:
if self.__class__.__module__ == '__main__':
alpha_logger.warning("model is defined in a main module. The model_name may not be correct.")
alpha_logger.warning(
"model is defined in a main module. The model_name may not be correct.")
model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
language='python',
......@@ -109,7 +112,8 @@ def create_model_base(party_name=None):
elif self._lib_name == 'xgboost':
model_desc[self._lib_name + "_version"] = xgbboot_version
else:
raise ValueError("3rd party lib name ({0}) is not recognized".format(self._lib_name))
raise ValueError(
"3rd party lib name ({0}) is not recognized".format(self._lib_name))
return model_desc
@classmethod
......@@ -121,12 +125,16 @@ def create_model_base(party_name=None):
elif cls._lib_name == 'xgboost':
current_version = xgbboot_version
else:
raise ValueError("3rd party lib name ({0}) is not recognized".format(cls._lib_name))
if LooseVersion(current_version) < LooseVersion(model_desc[cls._lib_name + "_version"]):
alpha_logger.warning('Current {2} version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc[cls._lib_name],
cls._lib_name))
raise ValueError(
"3rd party lib name ({0}) is not recognized".format(cls._lib_name))
if LooseVersion(current_version) < LooseVersion(
model_desc[cls._lib_name + "_version"]):
alpha_logger.warning(
'Current {2} version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc[cls._lib_name],
cls._lib_name))
return obj_layout
return ExternalLibBase
......@@ -6,6 +6,7 @@ Created on 2018-7-9
"""
from sklearn.svm import NuSVR
from alphamind.model.modelbase import create_model_base
......
......@@ -8,20 +8,21 @@ Created on 2017-12-4
import arrow
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBRegressor as XGBRegressorImpl
from xgboost import XGBClassifier as XGBClassifierImpl
from xgboost import XGBRegressor as XGBRegressorImpl
from alphamind.model.modelbase import create_model_base
class RandomForestRegressor(create_model_base('sklearn')):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
n_estimators: int = 100,
max_features: str = 'auto',
features=None,
fit_target=None,
**kwargs):
......@@ -38,8 +39,8 @@ class RandomForestRegressor(create_model_base('sklearn')):
class RandomForestClassifier(create_model_base('sklearn')):
def __init__(self,
n_estimators: int=100,
max_features: str='auto',
n_estimators: int = 100,
max_features: str = 'auto',
features=None,
fit_target=None,
**kwargs):
......@@ -56,13 +57,13 @@ class RandomForestClassifier(create_model_base('sklearn')):
class XGBRegressor(create_model_base('xgboost')):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
n_estimators: int = 100,
learning_rate: float = 0.1,
max_depth: int = 3,
features=None,
fit_target=None,
n_jobs: int=1,
missing: float=np.nan,
n_jobs: int = 1,
missing: float = np.nan,
**kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = XGBRegressorImpl(n_estimators=n_estimators,
......@@ -80,13 +81,13 @@ class XGBRegressor(create_model_base('xgboost')):
class XGBClassifier(create_model_base('xgboost')):
def __init__(self,
n_estimators: int=100,
learning_rate: float=0.1,
max_depth: int=3,
n_estimators: int = 100,
learning_rate: float = 0.1,
max_depth: int = 3,
features=None,
fit_target=None,
n_jobs: int=1,
missing: float=np.nan,
n_jobs: int = 1,
missing: float = np.nan,
**kwargs):
super().__init__(features=features, fit_target=fit_target)
self.impl = XGBClassifierImpl(n_estimators=n_estimators,
......@@ -108,8 +109,8 @@ class XGBTrainer(create_model_base('xgboost')):
objective='binary:logistic',
booster='gbtree',
tree_method='hist',
n_estimators: int=100,
learning_rate: float=0.1,
n_estimators: int = 100,
learning_rate: float = 0.1,
max_depth=3,
eval_sample=None,
early_stopping_rounds=None,
......@@ -117,8 +118,8 @@ class XGBTrainer(create_model_base('xgboost')):
colsample_bytree=1.,
features=None,
fit_target=None,
random_state: int=0,
n_jobs: int=1,
random_state: int = 0,
n_jobs: int = 1,
**kwargs):
super().__init__(features=features, fit_target=fit_target)
self.params = {
......@@ -173,8 +174,3 @@ class XGBTrainer(create_model_base('xgboost')):
imps = self.impl.get_fscore().items()
imps = sorted(imps, key=lambda x: x[0])
return list(zip(*imps))[1]
......@@ -3,4 +3,4 @@
Created on 2017-4-26
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -5,18 +5,19 @@ Created on 2017-7-21
@author: cheng.li
"""
from deprecated import deprecated
from math import inf
import numpy as np
import pandas as pd
from enum import IntEnum
from typing import Tuple
from typing import Optional
from math import inf
from typing import Dict
from typing import Iterable
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union
from typing import Iterable
import numpy as np
import pandas as pd
from PyFin.api import pyFinAssert
from deprecated import deprecated
class BoundaryDirection(IntEnum):
......@@ -43,13 +44,16 @@ class BoundaryImpl(object):
self._validation()
def _validation(self):
pyFinAssert(self.b_type in [BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL, BoundaryType.MINABSREL],
ValueError,
"Boundary Type {0} is not recognized".format(self.b_type))
pyFinAssert(
self.b_type in [BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL,
BoundaryType.MINABSREL],
ValueError,
"Boundary Type {0} is not recognized".format(self.b_type))
pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
ValueError,
"Boundary direction {0} is not recognized".format(self.direction))
pyFinAssert(
self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
ValueError,
"Boundary direction {0} is not recognized".format(self.direction))
def __call__(self, center: float):
if self.b_type == BoundaryType.ABSOLUTE:
......@@ -77,7 +81,8 @@ class BoundaryImpl(object):
abs_bound = center + abs_threshold
return min(rel_bound, abs_bound)
else:
pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value")
pyFinAssert(center >= 0., ValueError,
"relative bounds only support positive back bone value")
return self.val * center
......@@ -129,7 +134,7 @@ class LinearConstraints(object):
def __init__(self,
bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame,
backbone: np.ndarray=None):
backbone: np.ndarray = None):
self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns)))
self.bounds = bounds
self.cons_mat = cons_mat
......@@ -159,7 +164,8 @@ class LinearConstraints(object):
return self.cons_mat[self.names].values
@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
@deprecated(
reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
class Constraints(object):
def __init__(self,
......@@ -185,8 +191,9 @@ class Constraints(object):
def add_exposure(self, tags: np.ndarray, new_exp: np.ndarray):
if len(tags) != new_exp.shape[1]:
raise ValueError('new dags length is not compatible with exposure shape {1}'.format(len(tags),
new_exp.shape))
raise ValueError(
'new dags length is not compatible with exposure shape {1}'.format(len(tags),
new_exp.shape))
for tag in tags:
if tag in self.risk_maps:
......
......@@ -9,7 +9,6 @@ import numpy as np
def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray:
# assume return is log return
simple_return = np.exp(dx_ret)
......
......@@ -5,12 +5,14 @@ Created on 2017-5-5
@author: cheng.li
"""
import numpy as np
from typing import Tuple
from typing import Union
from alphamind.exceptions.exceptions import PortfolioBuilderException
import numpy as np
from alphamind.cython.optimizers import LPOptimizer
from alphamind.exceptions.exceptions import PortfolioBuilderException
def linear_builder(er: np.ndarray,
lbound: Union[np.ndarray, float],
......@@ -19,7 +21,7 @@ def linear_builder(er: np.ndarray,
risk_target: Tuple[np.ndarray, np.ndarray],
turn_over_target: float = None,
current_position: np.ndarray = None,
method: str='ecos') -> Tuple[str, np.ndarray, np.ndarray]:
method: str = 'ecos') -> Tuple[str, np.ndarray, np.ndarray]:
er = er.flatten()
n, m = risk_constraints.shape
......
......@@ -6,6 +6,7 @@ Created on 2017-5-9
"""
import numpy as np
from alphamind.utilities import group_mapping
from alphamind.utilities import simple_abssum
from alphamind.utilities import transform
......
......@@ -5,14 +5,16 @@ Created on 2017-6-27
@author: cheng.li
"""
import numpy as np
from typing import Union
from typing import Tuple
from typing import Optional
from typing import Dict
from typing import Optional
from typing import Tuple
from typing import Union
import cvxpy
from alphamind.cython.optimizers import QPOptimizer
import numpy as np
from alphamind.cython.optimizers import CVOptimizer
from alphamind.cython.optimizers import QPOptimizer
from alphamind.exceptions.exceptions import PortfolioBuilderException
......@@ -52,9 +54,10 @@ def mean_variance_builder(er: np.ndarray,
ubound: Union[np.ndarray, float],
risk_exposure: Optional[np.ndarray],
risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
lam: float=1.,
linear_solver: str='ma27') -> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target)
lam: float = 1.,
linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
risk_target)
if np.all(lbound == -np.inf) and np.all(ubound == np.inf) and cons_mat is None:
# using fast path cvxpy
......@@ -102,8 +105,9 @@ def target_vol_builder(er: np.ndarray,
risk_exposure: Optional[np.ndarray],
risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
vol_target: float = 1.,
linear_solver: str = 'ma27')-> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target)
linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
risk_target)
optimizer = CVOptimizer(er,
risk_model['cov'],
......@@ -119,6 +123,3 @@ def target_vol_builder(er: np.ndarray,
linear_solver=linear_solver)
return _create_result(optimizer, bm)
......@@ -8,12 +8,13 @@ Created on 2017-5-4
import numpy as np
from numpy import zeros
from numpy import zeros_like
from alphamind.utilities import groupby
from alphamind.utilities import set_value
def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None, masks: np.ndarray=None) -> np.ndarray:
def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None,
masks: np.ndarray = None) -> np.ndarray:
er = er.copy()
if masks is not None:
......@@ -28,7 +29,7 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None, masks
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index))
......
......@@ -8,12 +8,13 @@ Created on 2017-4-26
import numpy as np
from numpy import zeros
from numpy import zeros_like
from alphamind.utilities import groupby
from alphamind.utilities import set_value
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None, masks: np.ndarray=None) -> np.ndarray:
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray = None,
masks: np.ndarray = None) -> np.ndarray:
er = er.copy()
if masks is not None:
......@@ -28,7 +29,7 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None, masks: np
index_diff, order = groupby(groups)
start = 0
for diff_loc in index_diff:
current_index = order[start:diff_loc+1]
current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.)
......
......@@ -7,6 +7,7 @@ Created on 2018-5-29
import abc
from typing import List
import pandas as pd
......@@ -22,13 +23,13 @@ class FullRiskModel(RiskModel):
self.codes = sec_cov.index.tolist()
self.sec_cov = sec_cov.loc[self.codes, self.codes]
def get_cov(self, codes: List[int]=None):
def get_cov(self, codes: List[int] = None):
if codes:
return self.sec_cov.loc[codes, codes].values
else:
return self.sec_cov.values
def get_risk_profile(self, codes: List[int]=None):
def get_risk_profile(self, codes: List[int] = None):
return dict(
cov=self.get_cov(codes),
factor_cov=None,
......@@ -51,7 +52,7 @@ class FactorRiskModel(RiskModel):
self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names]
self.idsync = self.idsync[self.codes]
def get_risk_exp(self, codes: List[int]=None):
def get_risk_exp(self, codes: List[int] = None):
if codes:
return self.risk_exp.loc[codes, :].values
else:
......@@ -60,16 +61,16 @@ class FactorRiskModel(RiskModel):
def get_factor_cov(self):
return self.factor_cov.values
def get_idsync(self, codes: List[int]=None):
def get_idsync(self, codes: List[int] = None):
if codes:
return self.idsync[codes].values
else:
return self.idsync.values
def get_risk_profile(self, codes: List[int]=None):
def get_risk_profile(self, codes: List[int] = None):
return dict(
cov=None,
factor_cov=self.get_factor_cov(),
factor_loading=self.get_risk_exp(codes),
idsync=self.get_idsync(codes)
)
\ No newline at end of file
)
......@@ -3,4 +3,4 @@
Created on 2017-4-28
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -11,9 +11,8 @@ import pandas as pd
def simple_settle(weights: np.ndarray,
dx_return: np.ndarray,
groups: np.ndarray=None,
benchmark: np.ndarray=None) -> pd.DataFrame:
groups: np.ndarray = None,
benchmark: np.ndarray = None) -> pd.DataFrame:
weights = weights.flatten()
dx_return = dx_return.flatten()
......@@ -45,5 +44,3 @@ def simple_settle(weights: np.ndarray,
return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values},
index=ret_agg.index)
{
"strategy_name": "sample_strategy",
"data_process":
{
"pre_process": ["winsorize", "standardize"],
"neutralize_risk": ["SIZE", "industry_styles"],
"post_process": ["winsorize", "standardize"]
"data_process": {
"pre_process": [
"winsorize",
"standardize"
],
"neutralize_risk": [
"SIZE",
"industry_styles"
],
"post_process": [
"winsorize",
"standardize"
]
},
"risk_model":
{
"risk_model": {
"type": "short",
"neutralize_risk": ["SIZE", "industry_styles"]
"neutralize_risk": [
"SIZE",
"industry_styles"
]
},
"alpha_model":
{
"alpha_model": {
"model_type": "LinearRegression",
"features": ["EPS", "ROEDiluted"],
"parameters":
{
"features": [
"EPS",
"ROEDiluted"
],
"parameters": {
"fit_intercept": false
}
},
"freq": "1d",
"batch": 4,
"warm_start": 0,
"universe": ["zz500", ["zz500"]],
"universe": [
"zz500",
[
"zz500"
]
],
"benchmark": 905,
"optimizer":
{
"optimizer": {
"type": "risk_neutral",
"neutralize_risk": ["SIZE", "industry_styles"]
"neutralize_risk": [
"SIZE",
"industry_styles"
]
},
"executor":
{
"executor": {
"type": "naive"
}
}
\ No newline at end of file
......@@ -6,24 +6,26 @@ Created on 2018-5-3
"""
import copy
import numpy as np
import pandas as pd
from PyFin.api import makeSchedule
from PyFin.api import advanceDateByCalendar
from alphamind.utilities import map_freq
from alphamind.utilities import alpha_logger
from alphamind.model.composer import train_model
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.execution.naiveexecutor import NaiveExecutor
from PyFin.api import makeSchedule
from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.engines.sqlengine import industry_styles
from alphamind.data.engines.sqlengine import macro_styles
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.processing import factor_processing
from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.exceptions.exceptions import PortfolioBuilderException
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.model.composer import train_model
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
all_styles = risk_styles + industry_styles + macro_styles
......@@ -119,7 +121,8 @@ class Strategy(object):
self.index_return = self.engine.fetch_dx_return_index_range(self.benchmark,
dates=self.dates,
horizon=self.horizon,
offset=1).set_index('trade_date')
offset=1).set_index(
'trade_date')
self.total_data = total_data
def prepare_backtest_models(self):
......@@ -129,13 +132,16 @@ class Strategy(object):
if self.dask_client is None:
models = {}
for ref_date, _ in total_data_groups:
models[ref_date], _, _ = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta)
models[ref_date], _, _ = train_model(ref_date.strftime('%Y-%m-%d'),
self.alpha_model, self.data_meta)
else:
def worker(parameters):
new_model, _, _ = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2])
new_model, _, _ = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1],
parameters[2])
return parameters[0], new_model
l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups])
l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in
total_data_groups])
results = self.dask_client.gather(l)
models = dict(results)
self.alpha_models = models
......@@ -252,16 +258,19 @@ class Strategy(object):
positions['dx'] = self.total_data.dx.values
trade_dates = positions.trade_date.unique()
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=trade_dates)
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags},
index=trade_dates)
ret_df['benchmark_returns'] = self.index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] * ret_df['leverage']
ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] * ret_df[
'leverage']
return ret_df, positions
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, risk_model,
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound,
risk_model,
current_position):
more_opts = running_setting.more_opts
try:
......@@ -277,7 +286,8 @@ class Strategy(object):
current_position=current_position,
target_vol=more_opts.get('target_vol'),
risk_model=risk_model,
turn_over_target=more_opts.get('turn_over_target'))
turn_over_target=more_opts.get(
'turn_over_target'))
except PortfolioBuilderException:
alpha_logger.warning("Not able to fit the constraints. Using full re-balance.")
target_pos, _ = er_portfolio_analysis(er,
......@@ -297,16 +307,12 @@ class Strategy(object):
if __name__ == '__main__':
import os
from matplotlib import pyplot as plt
from dask.distributed import Client
from PyFin.api import CSQuantiles
from PyFin.api import LAST
from alphamind.api import Universe
from alphamind.api import ConstLinearModel
from alphamind.api import XGBTrainer
from alphamind.api import DataMeta
from alphamind.api import industry_list
from alphamind.api import winsorize_normal
from alphamind.api import standardize
from matplotlib import pyplot as plt
from matplotlib.pylab import mpl
......@@ -408,4 +414,4 @@ if __name__ == '__main__':
return ret_df
create_scenario(0.01, target_vol=0.01, method='tv')
\ No newline at end of file
create_scenario(0.01, target_vol=0.01, method='tv')
......@@ -6,14 +6,16 @@ Created on 2017-5-25
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize
from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints
from alphamind.data.processing import factor_processing
from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints
class TestFactorAnalysis(unittest.TestCase):
......@@ -29,14 +31,16 @@ class TestFactorAnalysis(unittest.TestCase):
new_factor = factor_processing(self.raw_factor,
pre_process=[standardize, winsorize_normal])
np.testing.assert_array_almost_equal(new_factor, winsorize_normal(standardize(self.raw_factor)))
np.testing.assert_array_almost_equal(new_factor,
winsorize_normal(standardize(self.raw_factor)))
new_factor = factor_processing(self.raw_factor,
pre_process=[standardize, winsorize_normal],
risk_factors=self.risk_factor)
np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor,
winsorize_normal(standardize(self.raw_factor))))
winsorize_normal(standardize(
self.raw_factor))))
def test_factor_analysis(self):
benchmark = np.random.randint(50, size=1000)
......@@ -64,7 +68,8 @@ class TestFactorAnalysis(unittest.TestCase):
weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor)
np.testing.assert_array_almost_equal(weight @ self.risk_factor,
benchmark @ self.risk_factor)
self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values)
def test_factor_analysis_with_several_factors(self):
......@@ -92,7 +97,8 @@ class TestFactorAnalysis(unittest.TestCase):
weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor)
np.testing.assert_array_almost_equal(weight @ self.risk_factor,
benchmark @ self.risk_factor)
if __name__ == '__main__':
......
......@@ -6,8 +6,10 @@ Created on 2017-5-12
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.analysis.perfanalysis import perf_attribution_by_pos
......@@ -28,14 +30,15 @@ class TestPerformanceAnalysis(unittest.TestCase):
index=dates)
explained_table = perf_attribution_by_pos(weights_series - bm_series,
next_bar_return_series,
risk_table)
next_bar_return_series,
risk_table)
to_explain = (weights_series - bm_series).multiply(next_bar_return_series, axis=0)
aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum()
aggregated_explained = explained_table.sum(axis=1)
np.testing.assert_array_almost_equal(aggregated_to_explain.values, aggregated_explained.values)
np.testing.assert_array_almost_equal(aggregated_to_explain.values,
aggregated_explained.values)
if __name__ == '__main__':
......
......@@ -6,14 +6,16 @@ Created on 2017-8-16
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.analysis.quantileanalysis import er_quantile_analysis
from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.data.processing import factor_processing
from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.quantile import quantile
class TestQuantileAnalysis(unittest.TestCase):
......
......@@ -6,8 +6,10 @@ Created on 2017-5-8
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis
......
......@@ -6,10 +6,11 @@ Created on 2017-11-1
"""
import unittest
import numpy as np
from alphamind.cython.optimizers import CVOptimizer
from alphamind.cython.optimizers import LPOptimizer
from alphamind.cython.optimizers import QPOptimizer
from alphamind.cython.optimizers import CVOptimizer
class TestOptimizers(unittest.TestCase):
......
......@@ -7,27 +7,29 @@ Created on 2018-4-17
import random
import unittest
import numpy as np
import pandas as pd
from scipy.stats import rankdata
from sqlalchemy import select, and_, or_
from PyFin.api import makeSchedule
from PyFin.api import CSQuantiles
from PyFin.api import CSRank
from PyFin.api import advanceDateByCalendar
from PyFin.api import bizDatesList
from PyFin.api import CSRank
from PyFin.api import CSQuantiles
from alphamind.tests.test_suite import SKIP_ENGINE_TESTS
from alphamind.tests.test_suite import DATA_ENGINE_URI
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import IndexMarket
from PyFin.api import makeSchedule
from scipy.stats import rankdata
from sqlalchemy import select, and_, or_
from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import IndexMarket
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe
from alphamind.tests.test_suite import DATA_ENGINE_URI
from alphamind.tests.test_suite import SKIP_ENGINE_TESTS
from alphamind.utilities import alpha_logger
......@@ -149,7 +151,8 @@ class TestSqlEngine(unittest.TestCase):
universe = Universe('zz500') + Universe('zz1000')
codes = self.engine.fetch_codes(ref_date, universe)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset, benchmark=benchmark)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset,
benchmark=benchmark)
start_date = advanceDateByCalendar('china.sse', ref_date, '2b')
end_date = advanceDateByCalendar('china.sse', ref_date, '6b')
......@@ -172,7 +175,8 @@ class TestSqlEngine(unittest.TestCase):
df = pd.read_sql(query, con=self.engine.engine)
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values - b_res.chgPct.values)
np.testing.assert_array_almost_equal(dx_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
horizon = 4
offset = 0
......@@ -180,7 +184,8 @@ class TestSqlEngine(unittest.TestCase):
universe = Universe('zz500') + Universe('zz1000')
codes = self.engine.fetch_codes(ref_date, universe)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset, benchmark=benchmark)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset,
benchmark=benchmark)
start_date = advanceDateByCalendar('china.sse', ref_date, '1b')
end_date = advanceDateByCalendar('china.sse', ref_date, '5b')
......@@ -203,7 +208,8 @@ class TestSqlEngine(unittest.TestCase):
df = pd.read_sql(query, con=self.engine.engine)
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values - b_res.chgPct.values)
np.testing.assert_array_almost_equal(dx_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
def test_sql_engine_fetch_dx_return_range(self):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
......@@ -276,7 +282,8 @@ class TestSqlEngine(unittest.TestCase):
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
calculated_return = dx_return[dx_return.trade_date == ref_date]
np.testing.assert_array_almost_equal(calculated_return.dx.values, res.chgPct.values - b_res.chgPct.values)
np.testing.assert_array_almost_equal(calculated_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
def test_sql_engine_fetch_dx_return_with_universe_adjustment(self):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', '2017-01-26', '-6m'),
......@@ -404,7 +411,8 @@ class TestSqlEngine(unittest.TestCase):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
self.ref_date,
'60b', 'china.sse')
ref_dates = ref_dates + [advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d')]
ref_dates = ref_dates + [
advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d')]
universe = Universe('zz500') + Universe('zz1000')
factor = 'ROE'
......@@ -414,7 +422,8 @@ class TestSqlEngine(unittest.TestCase):
groups = codes.groupby('trade_date')
for ref_date, g in groups:
forward_ref_date = advanceDateByCalendar('china.sse', ref_date, '60b').strftime('%Y-%m-%d')
forward_ref_date = advanceDateByCalendar('china.sse', ref_date, '60b').strftime(
'%Y-%m-%d')
query = select([Uqer.code, Uqer.ROE]).where(
and_(
Uqer.trade_date == forward_ref_date,
......@@ -451,7 +460,8 @@ class TestSqlEngine(unittest.TestCase):
benchmark = 906
index_data = self.engine.fetch_benchmark_range(benchmark, dates=ref_dates)
query = select([IndexComponent.trade_date, IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where(
query = select([IndexComponent.trade_date, IndexComponent.code,
(IndexComponent.weight / 100.).label('weight')]).where(
and_(
IndexComponent.trade_date.in_(ref_dates),
IndexComponent.indexCode == benchmark
......@@ -462,7 +472,8 @@ class TestSqlEngine(unittest.TestCase):
for ref_date in ref_dates:
calculated_data = index_data[index_data.trade_date == ref_date]
expected_data = df[df.trade_date == ref_date]
np.testing.assert_array_almost_equal(calculated_data.weight.values, expected_data.weight.values)
np.testing.assert_array_almost_equal(calculated_data.weight.values,
expected_data.weight.values)
def test_sql_engine_fetch_risk_model(self):
ref_date = self.ref_date
......@@ -533,7 +544,7 @@ class TestSqlEngine(unittest.TestCase):
ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw', 1)
cols = sorted(ind_matrix.columns[2:].tolist())
series = (ind_matrix[cols] * np.array(range(1, len(cols)+1))).sum(axis=1)
series = (ind_matrix[cols] * np.array(range(1, len(cols) + 1))).sum(axis=1)
df3['cat'] = series.values
expected_rank = df3[['ROE', 'cat']].groupby('cat').transform(lambda x: rankdata(x.values))
......@@ -542,7 +553,8 @@ class TestSqlEngine(unittest.TestCase):
np.testing.assert_array_almost_equal(df3['rank'].values,
df1['f'].values)
expected_quantile = df3[['ROE', 'cat']].groupby('cat').transform(lambda x: rankdata(x.values) / (len(x) + 1))
expected_quantile = df3[['ROE', 'cat']].groupby('cat').transform(
lambda x: rankdata(x.values) / (len(x) + 1))
expected_quantile[np.isnan(df3.ROE)] = np.nan
df3['quantile'] = expected_quantile['ROE'].values
np.testing.assert_array_almost_equal(df3['quantile'].values,
......
......@@ -6,6 +6,7 @@ Created on 2018-2-9
"""
import unittest
from alphamind.data.engines.universe import Universe
from alphamind.data.engines.universe import load_universe
......@@ -41,4 +42,4 @@ class TestUniverse(unittest.TestCase):
universe = Universe('zz500') & Universe('hs300')
univ_desc = universe.save()
loaded_universe = load_universe(univ_desc)
self.assertEqual(universe, loaded_universe)
\ No newline at end of file
self.assertEqual(universe, loaded_universe)
......@@ -6,8 +6,10 @@ Created on 2017-4-25
"""
import unittest
import numpy as np
from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import neutralize
......@@ -79,7 +81,8 @@ class TestNeutralize(unittest.TestCase):
exp_res = curr_y - curr_x @ model.coef_.T
exp_explained = curr_x * model.coef_.T
np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], exp_explained)
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0],
exp_explained)
calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
......@@ -93,7 +96,8 @@ class TestNeutralize(unittest.TestCase):
for j in range(self.y.shape[1]):
exp_explained = curr_x * model.coef_.T[:, j]
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, j], exp_explained)
np.testing.assert_array_almost_equal(
other_stats['explained'][self.groups == i, :, j], exp_explained)
if __name__ == '__main__':
......
......@@ -6,7 +6,9 @@ Created on 2017-8-16
"""
import unittest
import numpy as np
from alphamind.data.quantile import quantile
......
......@@ -6,8 +6,10 @@ Created on 2017-8-8
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.data.rank import rank
......@@ -37,6 +39,7 @@ class TestRank(unittest.TestCase):
ret.append(groups[index].values)
ret = np.concatenate(ret).reshape(-1, 1)
expected_rank = data['raw'].groupby(level=0).apply(lambda x: x.values.argsort(axis=0).argsort(axis=0))
expected_rank = data['raw'].groupby(level=0).apply(
lambda x: x.values.argsort(axis=0).argsort(axis=0))
expected_rank = np.concatenate(expected_rank).reshape(-1, 1)
np.testing.assert_array_equal(ret, expected_rank)
......@@ -6,12 +6,14 @@ Created on 2017-4-25
"""
import unittest
import numpy as np
import pandas as pd
from scipy.stats import zscore
from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import projection
from alphamind.data.standardize import standardize
class TestStandardize(unittest.TestCase):
......@@ -42,8 +44,8 @@ class TestStandardize(unittest.TestCase):
def test_standardize_with_group(self):
calc_zscore = standardize(self.x, self.groups)
exp_zscore = pd.DataFrame(self.x).\
groupby(self.groups).\
exp_zscore = pd.DataFrame(self.x). \
groupby(self.groups). \
transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
......@@ -87,4 +89,4 @@ if __name__ == '__main__':
x2 = s2.transform(y)
print(dt.datetime.now() - start)
np.testing.assert_array_almost_equal(x1, x2)
\ No newline at end of file
np.testing.assert_array_almost_equal(x1, x2)
......@@ -6,10 +6,12 @@ Created on 2017-4-25
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.winsorize import NormalWinsorizer
from alphamind.data.winsorize import winsorize_normal
class TestWinsorize(unittest.TestCase):
......
......@@ -6,7 +6,9 @@ Created on 2017-9-22
"""
import unittest
import pandas as pd
from alphamind.execution.naiveexecutor import NaiveExecutor
......@@ -42,4 +44,4 @@ class TestNaiveExecutor(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
unittest.main()
......@@ -7,11 +7,13 @@ Created on 2017-9-25
import unittest
from collections import deque
import numpy as np
import pandas as pd
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.execution.thresholdexecutor import ThresholdExecutor
from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
class TestExecutionPipeline(unittest.TestCase):
......
......@@ -7,8 +7,10 @@ Created on 2017-9-22
import unittest
from collections import deque
import numpy as np
import pandas as pd
from alphamind.execution.targetvolexecutor import TargetVolExecutor
......@@ -48,4 +50,4 @@ class TestTargetVolExecutor(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
unittest.main()
......@@ -6,7 +6,9 @@ Created on 2017-9-22
"""
import unittest
import pandas as pd
from alphamind.execution.thresholdexecutor import ThresholdExecutor
......
......@@ -6,9 +6,10 @@ Created on 2018-2-9
"""
import unittest
from alphamind.data.engines.universe import Universe
from alphamind.model.composer import DataMeta
from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta
from alphamind.model.treemodel import XGBClassifier
......@@ -19,7 +20,6 @@ class TestComposer(unittest.TestCase):
self.assertEqual(lhs.data_meta, rhs.data_meta)
def test_data_meta_persistence(self):
freq = '5b'
universe = Universe('zz800')
batch = 4
......@@ -83,7 +83,3 @@ class TestComposer(unittest.TestCase):
comp_desc = composer.save()
loaded_comp = Composer.load(comp_desc)
self._assert_composer_equal(composer, loaded_comp)
......@@ -6,14 +6,16 @@ Created on 2017-9-4
"""
import unittest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.loader import load_model
class TestLinearModel(unittest.TestCase):
......@@ -27,7 +29,6 @@ class TestLinearModel(unittest.TestCase):
self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self):
features = ['c', 'b', 'a']
weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=features,
......@@ -111,4 +112,3 @@ class TestLinearModel(unittest.TestCase):
np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights)
......@@ -6,8 +6,10 @@ Created on 2017-9-5
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model
......
......@@ -6,6 +6,7 @@ Created on 2018-2-8
"""
import unittest
from alphamind.model.linearmodel import ConstLinearModel
......@@ -13,4 +14,4 @@ class TestModelBase(unittest.TestCase):
def test_simple_model_features(self):
model = ConstLinearModel(features=['c', 'b', 'a'])
self.assertListEqual(['a', 'b', 'c'], model.features)
\ No newline at end of file
self.assertListEqual(['a', 'b', 'c'], model.features)
......@@ -6,13 +6,15 @@ Created on 2018-1-5
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer
......@@ -32,7 +34,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self):
......@@ -44,7 +47,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self):
......@@ -55,7 +59,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self):
......@@ -67,11 +72,11 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self):
model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1,
max_depth=3,
......@@ -109,5 +114,6 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc)
self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances)
......@@ -3,4 +3,4 @@
Created on 2017-4-27
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -6,15 +6,17 @@ Created on 2017-7-20
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
class TestConstraints(unittest.TestCase):
......
......@@ -6,7 +6,9 @@ Created on 2017-11-23
"""
import unittest
import numpy as np
from alphamind.portfolio.evolver import evolve_positions
......@@ -34,4 +36,4 @@ class TestEvolver(unittest.TestCase):
calculated_pos = evolve_positions(positions, dx_returns)
np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
\ No newline at end of file
np.testing.assert_array_almost_equal(expected_pos, calculated_pos)
......@@ -6,7 +6,9 @@ Created on 2017-5-5
"""
import unittest
import numpy as np
from alphamind.portfolio.linearbuilder import linear_builder
......
......@@ -6,8 +6,10 @@ Created on 2017-5-9
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.longshortbulder import long_short_builder
......@@ -37,7 +39,8 @@ class TestLongShortBuild(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_weights, expected_weights)
calc_weights = long_short_builder(self.x, groups=self.groups)
expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(lambda s: s / np.abs(s).sum(axis=0))
expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(
lambda s: s / np.abs(s).sum(axis=0))
np.testing.assert_array_almost_equal(calc_weights, expected_weights)
def test_long_short_build_with_masks(self):
......
......@@ -6,8 +6,9 @@ Created on 2017-6-27
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder
......@@ -31,7 +32,8 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
risk_target)
self.assertTrue(status == 'optimal')
self.assertAlmostEqual(x.sum(), bm.sum())
......@@ -77,7 +79,8 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target, lam=100)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
risk_target, lam=100)
self.assertTrue(status == 'optimal')
self.assertAlmostEqual(x.sum(), bm.sum())
......@@ -101,10 +104,11 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_exposure = np.array([[1., 1., 1.]]).T
risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target, 0.1)
status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target,
0.1)
self.assertTrue(status == 'optimal')
self.assertTrue(np.all(x <= ubound + 1.e-6))
self.assertTrue(np.all(x >= lbound) - 1.e-6)
self.assertTrue(np.all(x @ risk_exposure <= risk_target[1] + 1.e-6))
self.assertTrue(np.all(x @ risk_exposure >= risk_target[0] - 1.e-6))
np.testing.assert_array_almost_equal(x, [-0.3, -0.10919033, 0.40919033] + bm)
\ No newline at end of file
np.testing.assert_array_almost_equal(x, [-0.3, -0.10919033, 0.40919033] + bm)
......@@ -6,8 +6,10 @@ Created on 2017-5-4
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.percentbuilder import percent_build
......@@ -82,4 +84,4 @@ class TestPercentBuild(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
unittest.main()
......@@ -6,8 +6,10 @@ Created on 2017-4-27
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build
......
......@@ -6,17 +6,21 @@ Created on 2018-5-29
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.portfolio.riskmodel import FullRiskModel
from alphamind.portfolio.riskmodel import FactorRiskModel
from alphamind.portfolio.riskmodel import FullRiskModel
class TestRiskModel(unittest.TestCase):
def setUp(self):
self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'], index=['a', 'b'])
self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'], index=[1, 2, 3])
self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'],
index=['a', 'b'])
self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'],
index=[1, 2, 3])
self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3])
self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \
+ np.diag(self.idsync.values)
......@@ -55,5 +59,3 @@ class TestRiskModel(unittest.TestCase):
np.testing.assert_array_almost_equal(res, self.risk_exp)
res = model.get_idsync()
np.testing.assert_array_almost_equal(res, self.idsync)
......@@ -3,4 +3,4 @@
Created on 2017-4-28
@author: cheng.li
"""
\ No newline at end of file
"""
......@@ -5,10 +5,11 @@ Created on 2017-4-28
@author: cheng.li
"""
import unittest
import numpy as np
import pandas as pd
from alphamind.settlement.simplesettle import simple_settle
......
......@@ -14,7 +14,6 @@ if not SKIP_ENGINE_TESTS:
else:
DATA_ENGINE_URI = None
if __name__ == '__main__':
from simpleutils import add_parent_path
......
......@@ -6,18 +6,17 @@ Created on 2017-4-25
"""
import base64
import pickle
import math
from simpleutils import CustomLogger
import numpy as np
import numba as nb
import pickle
import numba as nb
import numpy as np
from simpleutils import CustomLogger
alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq):
if freq == '1m':
horizon = 21
elif freq == '1w':
......@@ -97,7 +96,6 @@ def simple_sum(x, axis=0):
@nb.njit(nogil=True, cache=True)
def simple_abssum(x, axis=0):
length, width = x.shape
if axis == 0:
......@@ -189,7 +187,7 @@ def simple_std(x, axis=0, ddof=1):
def agg_sum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
......@@ -215,7 +213,7 @@ def agg_sqrsum(groups, x):
def agg_abssum(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length):
for j in range(width):
......@@ -227,15 +225,15 @@ def agg_abssum(groups, x):
def agg_mean(groups, x):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
res = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g + 1, dtype=np.int32)
for i in range(length):
for j in range(width):
res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
for i in range(max_g + 1):
curr = bin_count[i]
for j in range(width):
res[i, j] /= curr
......@@ -246,9 +244,9 @@ def agg_mean(groups, x):
def agg_std(groups, x, ddof=1):
max_g = groups.max()
length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64)
res = np.zeros((max_g + 1, width), dtype=np.float64)
sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32)
bin_count = np.zeros(max_g + 1, dtype=np.int32)
for i in range(length):
for j in range(width):
......@@ -256,7 +254,7 @@ def agg_std(groups, x, ddof=1):
sumsq[groups[i], j] += x[i, j] * x[i, j]
bin_count[groups[i]] += 1
for i in range(max_g+1):
for i in range(max_g + 1):
curr = bin_count[i]
for j in range(width):
res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
......@@ -304,9 +302,8 @@ def array_index(array, items):
def transform(groups: np.ndarray,
x: np.ndarray,
func: str,
ddof: int=1,
scale: float=1.) -> np.ndarray:
ddof: int = 1,
scale: float = 1.) -> np.ndarray:
if func == 'mean':
value_data = agg_mean(groups, x)
elif func == 'std':
......@@ -350,4 +347,4 @@ def encode(obj: object) -> str:
def decode(str_repr: str):
encoded = str_repr.encode('ascii')
return pickle.loads(base64.decodebytes(encoded))
\ No newline at end of file
return pickle.loads(base64.decodebytes(encoded))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment