Commit ee112dba authored by Dr.李's avatar Dr.李

FORMAT: reformat codes

parent b5418fb8
...@@ -5,5 +5,4 @@ Created on 2017-4-25 ...@@ -5,5 +5,4 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
__version__ = "0.2.1" __version__ = "0.2.1"
...@@ -8,10 +8,11 @@ Created on 2018-3-5 ...@@ -8,10 +8,11 @@ Created on 2018-3-5
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import statsmodels.api as sm import statsmodels.api as sm
from alphamind.utilities import alpha_logger
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.utilities import alpha_logger
def cs_impl(ref_date, def cs_impl(ref_date,
...@@ -33,7 +34,8 @@ def cs_impl(ref_date, ...@@ -33,7 +34,8 @@ def cs_impl(ref_date,
total_risk_exp = total_data[constraint_risk] total_risk_exp = total_data[constraint_risk]
er = total_data[[factor_name]].values.astype(float) er = total_data[[factor_name]].values.astype(float)
er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values, [standardize]).flatten() er = factor_processing(er, [winsorize_normal, standardize], total_risk_exp.values,
[standardize]).flatten()
industry = total_data.industry_name.values industry = total_data.industry_name.values
codes = total_data.code.tolist() codes = total_data.code.tolist()
...@@ -75,7 +77,8 @@ def cross_section_analysis(ref_date, ...@@ -75,7 +77,8 @@ def cross_section_analysis(ref_date,
industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1) industry_matrix = engine.fetch_industry_matrix(ref_date, codes, 'sw_adj', 1)
dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1) dx_returns = engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=1)
return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk, industry_matrix, dx_returns) return cs_impl(ref_date, factor_data, factor_name, risk_exposure, constraint_risk,
industry_matrix, dx_returns)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -8,18 +8,20 @@ Created on 2017-5-25 ...@@ -8,18 +8,20 @@ Created on 2017-5-25
from typing import Optional from typing import Optional
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.processing import factor_processing
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.longshortbulder import long_short_builder
from alphamind.portfolio.rankbuilder import rank_build
from alphamind.portfolio.linearbuilder import linear_builder from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.longshortbulder import long_short_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder from alphamind.portfolio.meanvariancebuilder import target_vol_builder
from alphamind.data.processing import factor_processing from alphamind.portfolio.rankbuilder import rank_build
from alphamind.settlement.simplesettle import simple_settle from alphamind.settlement.simplesettle import simple_settle
...@@ -106,7 +108,8 @@ def er_portfolio_analysis(er: np.ndarray, ...@@ -106,7 +108,8 @@ def er_portfolio_analysis(er: np.ndarray,
raise ValueError('linear programming optimizer in status: {0}'.format(status)) raise ValueError('linear programming optimizer in status: {0}'.format(status))
elif method == 'rank': elif method == 'rank':
weights = rank_build(er, use_rank=kwargs['use_rank'], masks=is_tradable).flatten() * benchmark.sum() / kwargs[ weights = rank_build(er, use_rank=kwargs['use_rank'],
masks=is_tradable).flatten() * benchmark.sum() / kwargs[
'use_rank'] 'use_rank']
elif method == 'ls' or method == 'long_short': elif method == 'ls' or method == 'long_short':
weights = long_short_builder(er).flatten() weights = long_short_builder(er).flatten()
......
...@@ -6,6 +6,7 @@ Created on 2017-5-12 ...@@ -6,6 +6,7 @@ Created on 2017-5-12
""" """
import pandas as pd import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis from alphamind.analysis.riskanalysis import risk_analysis
......
...@@ -6,22 +6,23 @@ Created on 2017-8-16 ...@@ -6,22 +6,23 @@ Created on 2017-8-16
""" """
from typing import Optional from typing import Optional
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.utilities import agg_mean
from alphamind.data.processing import factor_processing
from alphamind.data.quantile import quantile from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
from alphamind.data.processing import factor_processing from alphamind.utilities import agg_mean
def quantile_analysis(factors: pd.DataFrame, def quantile_analysis(factors: pd.DataFrame,
factor_weights: np.ndarray, factor_weights: np.ndarray,
dx_return: np.ndarray, dx_return: np.ndarray,
n_bins: int=5, n_bins: int = 5,
risk_exp: Optional[np.ndarray]=None, risk_exp: Optional[np.ndarray] = None,
**kwargs): **kwargs):
if 'pre_process' in kwargs: if 'pre_process' in kwargs:
pre_process = kwargs['pre_process'] pre_process = kwargs['pre_process']
del kwargs['pre_process'] del kwargs['pre_process']
...@@ -42,7 +43,6 @@ def er_quantile_analysis(er: np.ndarray, ...@@ -42,7 +43,6 @@ def er_quantile_analysis(er: np.ndarray,
n_bins: int, n_bins: int,
dx_return: np.ndarray, dx_return: np.ndarray,
de_trend=False) -> np.ndarray: de_trend=False) -> np.ndarray:
er = er.flatten() er = er.flatten()
q_groups = quantile(er, n_bins) q_groups = quantile(er, n_bins)
...@@ -78,8 +78,8 @@ if __name__ == '__main__': ...@@ -78,8 +78,8 @@ if __name__ == '__main__':
r, r,
risk_exp=None, risk_exp=None,
n_bins=n_bins, n_bins=n_bins,
pre_process=[], #[winsorize_normal, standardize], pre_process=[], # [winsorize_normal, standardize],
post_process=[]) #[standardize]) post_process=[]) # [standardize])
er = x_w @ f_df.values.T er = x_w @ f_df.values.T
expected = er_quantile_analysis(er, n_bins, r) expected = er_quantile_analysis(er, n_bins, r)
......
...@@ -6,15 +6,16 @@ Created on 2017-5-6 ...@@ -6,15 +6,16 @@ Created on 2017-5-6
""" """
from typing import Tuple from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
def risk_analysis(net_weight_series: pd.Series, def risk_analysis(net_weight_series: pd.Series,
next_bar_return_series: pd.Series, next_bar_return_series: pd.Series,
risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: risk_table: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
group_idx = net_weight_series.index.values.astype(int) group_idx = net_weight_series.index.values.astype(int)
net_pos = net_weight_series.values.reshape((-1, 1)) net_pos = net_weight_series.values.reshape((-1, 1))
risk_factor_cols = risk_table.columns risk_factor_cols = risk_table.columns
...@@ -31,6 +32,8 @@ def risk_analysis(net_weight_series: pd.Series, ...@@ -31,6 +32,8 @@ def risk_analysis(net_weight_series: pd.Series,
cols = ['idiosyncratic'] cols = ['idiosyncratic']
cols.extend(risk_factor_cols) cols.extend(risk_factor_cols)
explained_table = pd.DataFrame(explained_table * net_pos, columns=cols, index=net_weight_series.index) explained_table = pd.DataFrame(explained_table * net_pos, columns=cols,
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols, index=net_weight_series.index) index=net_weight_series.index)
exposure_table = pd.DataFrame(exposure[:, :, 0] * net_pos, columns=risk_factor_cols,
index=net_weight_series.index)
return explained_table, exposure_table.groupby(level=0).first() return explained_table, exposure_table.groupby(level=0).first()
...@@ -5,63 +5,57 @@ Created on 2017-8-16 ...@@ -5,63 +5,57 @@ Created on 2017-8-16
@author: cheng.li @author: cheng.li
""" """
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.analysis.factoranalysis import er_portfolio_analysis from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.analysis.quantileanalysis import quantile_analysis from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.analysis.quantileanalysis import er_quantile_analysis from alphamind.analysis.quantileanalysis import er_quantile_analysis
from alphamind.data.engines.universe import Universe from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.data.processing import factor_processing from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import factor_tables
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.engines.sqlengine import industry_styles from alphamind.data.engines.sqlengine import industry_styles
from alphamind.data.engines.sqlengine import macro_styles from alphamind.data.engines.sqlengine import macro_styles
from alphamind.data.winsorize import winsorize_normal from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.winsorize import NormalWinsorizer from alphamind.data.engines.universe import Universe
from alphamind.data.standardize import standardize from alphamind.data.engines.utilities import industry_list
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.data.rank import rank from alphamind.data.processing import factor_processing
from alphamind.data.rank import percentile from alphamind.data.rank import percentile
from alphamind.data.engines.sqlengine import factor_tables from alphamind.data.rank import rank
from alphamind.data.engines.utilities import industry_list from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import projection
from alphamind.model import LinearRegression from alphamind.data.standardize import standardize
from alphamind.model import LassoRegression from alphamind.data.winsorize import NormalWinsorizer
from alphamind.data.winsorize import winsorize_normal
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
from alphamind.model import ConstLinearModel from alphamind.model import ConstLinearModel
from alphamind.model import LassoRegression
from alphamind.model import LinearRegression
from alphamind.model import LogisticRegression from alphamind.model import LogisticRegression
from alphamind.model import RandomForestRegressor from alphamind.model import NvSVRModel
from alphamind.model import RandomForestClassifier from alphamind.model import RandomForestClassifier
from alphamind.model import XGBRegressor from alphamind.model import RandomForestRegressor
from alphamind.model import XGBClassifier from alphamind.model import XGBClassifier
from alphamind.model import XGBRegressor
from alphamind.model import XGBTrainer from alphamind.model import XGBTrainer
from alphamind.model import NvSVRModel
from alphamind.model import load_model from alphamind.model import load_model
from alphamind.model.data_preparing import fetch_data_package
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.composer import Composer from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta from alphamind.model.composer import DataMeta
from alphamind.model.composer import train_model
from alphamind.model.composer import predict_by_model from alphamind.model.composer import predict_by_model
from alphamind.model.composer import train_model
from alphamind.execution.naiveexecutor import NaiveExecutor from alphamind.model.data_preparing import fetch_data_package
from alphamind.execution.thresholdexecutor import ThresholdExecutor from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.execution.targetvolexecutor import TargetVolExecutor from alphamind.model.data_preparing import fetch_train_phase
from alphamind.execution.pipeline import ExecutionPipeline from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.portfolio.evolver import evolve_positions
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq from alphamind.utilities import map_freq
__all__ = [ __all__ = [
'SqlEngine', 'SqlEngine',
'factor_analysis', 'factor_analysis',
......
...@@ -11,17 +11,15 @@ from alphamind.benchmarks.data.standardize import benchmark_standardize ...@@ -11,17 +11,15 @@ from alphamind.benchmarks.data.standardize import benchmark_standardize
from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group from alphamind.benchmarks.data.standardize import benchmark_standardize_with_group
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal
from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group from alphamind.benchmarks.data.winsorize import benchmark_winsorize_normal_with_group
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent
from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group from alphamind.benchmarks.portfolio.percentbuild import benchmark_build_percent_with_group
from alphamind.benchmarks.portfolio.linearbuild import benchmark_build_linear from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank
from alphamind.benchmarks.portfolio.rankbuild import benchmark_build_rank_with_group
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle
from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group from alphamind.benchmarks.settlement.simplesettle import benchmark_simple_settle_with_group
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30) benchmark_neutralize_with_groups(3000, 10, 1000, 30)
benchmark_neutralize(30, 3, 50000) benchmark_neutralize(30, 3, 50000)
......
...@@ -16,7 +16,8 @@ from alphamind.data.neutralize import neutralize ...@@ -16,7 +16,8 @@ from alphamind.data.neutralize import neutralize
def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting least square fitting benchmarking") print("Starting least square fitting benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
y = np.random.randn(n_samples, 5) y = np.random.randn(n_samples, 5)
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
...@@ -40,10 +41,12 @@ def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None: ...@@ -40,10 +41,12 @@ def benchmark_neutralize(n_samples: int, n_features: int, n_loops: int) -> None:
np.testing.assert_array_almost_equal(calc_res, exp_res) np.testing.assert_array_almost_equal(calc_res, exp_res)
def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None: def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting least square fitting with group benchmarking") print("Starting least square fitting with group benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features, n_features,
n_loops, n_loops,
n_groups)) n_groups))
...@@ -71,7 +74,7 @@ def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: i ...@@ -71,7 +74,7 @@ def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: i
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
if __name__ == '__main__': if __name__ == '__main__':
benchmark_neutralize(3000, 10, 1000) benchmark_neutralize(3000, 10, 1000)
benchmark_neutralize_with_groups(3000, 10, 1000, 30) benchmark_neutralize_with_groups(3000, 10, 1000, 30)
...@@ -6,16 +6,19 @@ Created on 2017-4-25 ...@@ -6,16 +6,19 @@ Created on 2017-4-25
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import zscore from scipy.stats import zscore
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting standardizing benchmarking") print("Starting standardizing benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
...@@ -34,10 +37,15 @@ def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None ...@@ -34,10 +37,15 @@ def benchmark_standardize(n_samples: int, n_features: int, n_loops: int) -> None
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None: def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting standardizing with group-by values benchmarking") print("Starting standardizing with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups)) print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
x = np.random.randn(n_samples, n_features) x = np.random.randn(n_samples, n_features)
groups = np.random.randint(n_groups, size=n_samples) groups = np.random.randint(n_groups, size=n_samples)
...@@ -51,7 +59,8 @@ def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: i ...@@ -51,7 +59,8 @@ def benchmark_standardize_with_group(n_samples: int, n_features: int, n_loops: i
start = dt.datetime.now() start = dt.datetime.now()
for _ in range(n_loops): for _ in range(n_loops):
_ = pd.DataFrame(x).groupby(groups).transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0)) _ = pd.DataFrame(x).groupby(groups).transform(
lambda s: (s - s.mean(axis=0)) / s.std(axis=0))
benchmark_model_time = dt.datetime.now() - start benchmark_model_time = dt.datetime.now() - start
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
......
...@@ -6,15 +6,18 @@ Created on 2017-4-25 ...@@ -6,15 +6,18 @@ Created on 2017-4-25
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None: def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting winsorize normal benchmarking") print("Starting winsorize normal benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features, n_loops)) print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2})".format(n_samples, n_features,
n_loops))
num_stds = 2 num_stds = 2
...@@ -46,10 +49,15 @@ def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) -> ...@@ -46,10 +49,15 @@ def benchmark_winsorize_normal(n_samples: int, n_features: int, n_loops: int) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int, n_groups: int) -> None: def benchmark_winsorize_normal_with_group(n_samples: int, n_features: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting winsorize normal with group-by values benchmarking") print("Starting winsorize normal with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_features, n_loops, n_groups)) print(
"Parameters(n_samples: {0}, n_features: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_features,
n_loops,
n_groups))
num_stds = 2 num_stds = 2
......
...@@ -6,10 +6,12 @@ Created on 2017-5-5 ...@@ -6,10 +6,12 @@ Created on 2017-5-5
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
from scipy.optimize import linprog
from cvxopt import matrix from cvxopt import matrix
from cvxopt import solvers from cvxopt import solvers
from scipy.optimize import linprog
from alphamind.portfolio.linearbuilder import linear_builder from alphamind.portfolio.linearbuilder import linear_builder
solvers.options['show_progress'] = False solvers.options['show_progress'] = False
...@@ -18,7 +20,8 @@ solvers.options['show_progress'] = False ...@@ -18,7 +20,8 @@ solvers.options['show_progress'] = False
def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None: def benchmark_build_linear(n_samples: int, n_risks: int, n_loop: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by linear programming") print("Starting portfolio construction by linear programming")
print("Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop)) print(
"Parameters(n_samples: {0}, n_risks: {1}, n_loop: {2})".format(n_samples, n_risks, n_loop))
er = np.random.randn(n_samples) er = np.random.randn(n_samples)
risk_exp = np.random.randn(n_samples, n_risks) risk_exp = np.random.randn(n_samples, n_risks)
......
...@@ -4,4 +4,3 @@ Created on 2017-5-9 ...@@ -4,4 +4,3 @@ Created on 2017-5-9
@author: cheng.li @author: cheng.li
""" """
...@@ -6,15 +6,18 @@ Created on 2017-5-4 ...@@ -6,15 +6,18 @@ Created on 2017-5-4
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.percentbuilder import percent_build from alphamind.portfolio.percentbuilder import percent_build
def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None: def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by percent benchmarking") print("Starting portfolio construction by percent benchmarking")
print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included, n_loops)) print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2})".format(n_samples, p_included,
n_loops))
n_portfolio = 10 n_portfolio = 10
...@@ -41,10 +44,15 @@ def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) -> ...@@ -41,10 +44,15 @@ def benchmark_build_percent(n_samples: int, n_loops: int, p_included: float) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float, n_groups: int) -> None: def benchmark_build_percent_with_group(n_samples: int, n_loops: int, p_included: float,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by percent with group-by values benchmarking") print("Starting portfolio construction by percent with group-by values benchmarking")
print("Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, p_included, n_loops, n_groups)) print(
"Parameters(n_samples: {0}, p_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
p_included,
n_loops,
n_groups))
n_portfolio = 10 n_portfolio = 10
......
...@@ -6,15 +6,18 @@ Created on 2017-4-27 ...@@ -6,15 +6,18 @@ Created on 2017-4-27
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None: def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by rank benchmarking") print("Starting portfolio construction by rank benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included, n_loops)) print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2})".format(n_samples, n_included,
n_loops))
n_portfolio = 10 n_portfolio = 10
...@@ -40,10 +43,15 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None: ...@@ -40,10 +43,15 @@ def benchmark_build_rank(n_samples: int, n_loops: int, n_included: int) -> None:
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int, n_groups: int) -> None: def benchmark_build_rank_with_group(n_samples: int, n_loops: int, n_included: int,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting portfolio construction by rank with group-by values benchmarking") print("Starting portfolio construction by rank with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_included, n_loops, n_groups)) print(
"Parameters(n_samples: {0}, n_included: {1}, n_loops: {2}, n_groups: {3})".format(n_samples,
n_included,
n_loops,
n_groups))
n_portfolio = 10 n_portfolio = 10
......
...@@ -6,15 +6,19 @@ Created on 2017-4-28 ...@@ -6,15 +6,19 @@ Created on 2017-4-28
""" """
import datetime as dt import datetime as dt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.settlement.simplesettle import simple_settle from alphamind.settlement.simplesettle import simple_settle
def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None: def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting simple settle benchmarking") print("Starting simple settle benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples, n_portfolios, n_loops)) print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2})".format(n_samples,
n_portfolios,
n_loops))
weights = np.random.randn(n_samples, n_portfolios) weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
...@@ -37,10 +41,12 @@ def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) -> ...@@ -37,10 +41,12 @@ def benchmark_simple_settle(n_samples: int, n_portfolios: int, n_loops: int) ->
print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time)) print('{0:20s}: {1}'.format('Benchmark model', benchmark_model_time))
def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int, n_groups: int) -> None: def benchmark_simple_settle_with_group(n_samples: int, n_portfolios: int, n_loops: int,
n_groups: int) -> None:
print("-" * 60) print("-" * 60)
print("Starting simple settle with group-by values benchmarking") print("Starting simple settle with group-by values benchmarking")
print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(n_samples, n_portfolios, n_loops, n_groups)) print("Parameters(n_samples: {0}, n_portfolios: {1}, n_loops: {2}, n_groups: {3})".format(
n_samples, n_portfolios, n_loops, n_groups))
weights = np.random.randn(n_samples, n_portfolios) weights = np.random.randn(n_samples, n_portfolios)
ret_series = np.random.randn(n_samples) ret_series = np.random.randn(n_samples)
......
...@@ -7,7 +7,9 @@ Created on 2017-6-29 ...@@ -7,7 +7,9 @@ Created on 2017-6-29
import argparse import argparse
from collections import namedtuple from collections import namedtuple
from sqlalchemy import create_engine from sqlalchemy import create_engine
from alphamind.data.dbmodel import models from alphamind.data.dbmodel import models
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
......
...@@ -10,7 +10,16 @@ cimport numpy as cnp ...@@ -10,7 +10,16 @@ cimport numpy as cnp
from libcpp.string cimport string from libcpp.string cimport string
from libcpp.vector cimport vector from libcpp.vector cimport vector
import numpy as np import numpy as np
from PyFin.api import pyFinAssert
cimport
numpy as cnp
import numpy as np
from libcpp.string cimport
string
from libcpp.vector cimport
vector
cdef extern from "lpoptimizer.hpp" namespace "pfopt": cdef extern from "lpoptimizer.hpp" namespace "pfopt":
......
...@@ -5,11 +5,10 @@ Created on 2017-4-25 ...@@ -5,11 +5,10 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal as winsorize
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.data.rank import rank from alphamind.data.rank import rank
from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal as winsorize
__all__ = ['standardize', __all__ = ['standardize',
'winsorize', 'winsorize',
......
...@@ -4,4 +4,3 @@ Created on 2017-6-29 ...@@ -4,4 +4,3 @@ Created on 2017-6-29
@author: cheng.li @author: cheng.li
""" """
...@@ -5,7 +5,7 @@ Created on 2017-6-29 ...@@ -5,7 +5,7 @@ Created on 2017-6-29
@author: cheng.li @author: cheng.li
""" """
from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text, Boolean, text, JSON from sqlalchemy import BigInteger, Column, DateTime, Float, Index, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base() Base = declarative_base()
...@@ -72,7 +72,6 @@ class FundMaster(Base): ...@@ -72,7 +72,6 @@ class FundMaster(Base):
class Categories(Base): class Categories(Base):
__tablename__ = 'categories' __tablename__ = 'categories'
__table_args__ = ( __table_args__ = (
Index('categories_pk', 'trade_date', 'code', unique=True), Index('categories_pk', 'trade_date', 'code', unique=True),
...@@ -100,7 +99,8 @@ class FactorMaster(Base): ...@@ -100,7 +99,8 @@ class FactorMaster(Base):
class HaltList(Base): class HaltList(Base):
__tablename__ = 'halt_list' __tablename__ = 'halt_list'
__table_args__ = ( __table_args__ = (
Index('halt_list_Date_Code_haltBeginTime_uindex', 'trade_date', 'code', 'haltBeginTime', unique=True), Index('halt_list_Date_Code_haltBeginTime_uindex', 'trade_date', 'code', 'haltBeginTime',
unique=True),
) )
trade_date = Column(DateTime, primary_key=True, nullable=False) trade_date = Column(DateTime, primary_key=True, nullable=False)
......
This diff is collapsed.
This diff is collapsed.
...@@ -5,13 +5,15 @@ Created on 2017-7-7 ...@@ -5,13 +5,15 @@ Created on 2017-7-7
@author: cheng.li @author: cheng.li
""" """
import sys
import abc import abc
import sys
import pandas as pd import pandas as pd
from sqlalchemy import and_ from sqlalchemy import and_
from sqlalchemy import or_
from sqlalchemy import not_ from sqlalchemy import not_
from sqlalchemy import or_
from sqlalchemy import select from sqlalchemy import select
from alphamind.data.dbmodel.models import Universe as UniverseTable from alphamind.data.dbmodel.models import Universe as UniverseTable
...@@ -53,7 +55,8 @@ class BaseUniverse(metaclass=abc.ABCMeta): ...@@ -53,7 +55,8 @@ class BaseUniverse(metaclass=abc.ABCMeta):
def _query_statements(self, start_date: str = None, end_date: str = None, dates=None): def _query_statements(self, start_date: str = None, end_date: str = None, dates=None):
return and_( return and_(
self.condition(), self.condition(),
UniverseTable.trade_date.in_(dates) if dates else UniverseTable.trade_date.between(start_date, end_date) UniverseTable.trade_date.in_(dates) if dates else UniverseTable.trade_date.between(
start_date, end_date)
) )
...@@ -177,7 +180,6 @@ def load_universe(u_desc: dict): ...@@ -177,7 +180,6 @@ def load_universe(u_desc: dict):
if __name__ == '__main__': if __name__ == '__main__':
from PyFin.api import *
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
engine = SqlEngine() engine = SqlEngine()
......
...@@ -5,21 +5,21 @@ Created on 2017-12-25 ...@@ -5,21 +5,21 @@ Created on 2017-12-25
@author: cheng.li @author: cheng.li
""" """
from typing import Iterable
from typing import Dict from typing import Dict
from typing import Iterable
from alphamind.data.dbmodel.models import Categories
from alphamind.data.dbmodel.models import Market from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import RiskCovDay from alphamind.data.dbmodel.models import RiskCovDay
from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskCovLong from alphamind.data.dbmodel.models import RiskCovLong
from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import SpecificRiskDay from alphamind.data.dbmodel.models import SpecificRiskDay
from alphamind.data.dbmodel.models import SpecificRiskShort
from alphamind.data.dbmodel.models import SpecificRiskLong from alphamind.data.dbmodel.models import SpecificRiskLong
from alphamind.data.dbmodel.models import SpecificRiskShort
from alphamind.data.dbmodel.models import Uqer from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Categories
from alphamind.data.engines.industries import INDUSTRY_MAPPING from alphamind.data.engines.industries import INDUSTRY_MAPPING
factor_tables = [Market, RiskExposure, Uqer, Categories] factor_tables = [Market, RiskExposure, Uqer, Categories]
...@@ -66,5 +66,5 @@ def _map_industry_category(category: str) -> str: ...@@ -66,5 +66,5 @@ def _map_industry_category(category: str) -> str:
raise ValueError("No other industry is supported at the current time") raise ValueError("No other industry is supported at the current time")
def industry_list(category: str, level: int=1) -> list: def industry_list(category: str, level: int = 1) -> list:
return INDUSTRY_MAPPING[category][level] return INDUSTRY_MAPPING[category][level]
...@@ -5,21 +5,22 @@ Created on 2017-4-25 ...@@ -5,21 +5,22 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np from typing import Dict
import numba as nb
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from typing import Dict
import numba as nb
import numpy as np
import alphamind.utilities as utils import alphamind.utilities as utils
def neutralize(x: np.ndarray, def neutralize(x: np.ndarray,
y: np.ndarray, y: np.ndarray,
groups: np.ndarray=None, groups: np.ndarray = None,
detail: bool=False, detail: bool = False,
weights: np.ndarray = None) \ weights: np.ndarray = None) \
-> Union[np.ndarray, Tuple[np.ndarray, Dict]]: -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
if y.ndim == 1: if y.ndim == 1:
y = y.reshape((-1, 1)) y = y.reshape((-1, 1))
......
...@@ -5,19 +5,20 @@ Created on 2017-8-21 ...@@ -5,19 +5,20 @@ Created on 2017-8-21
@author: cheng.li @author: cheng.li
""" """
from typing import Optional
from typing import List from typing import List
from typing import Optional
import numpy as np import numpy as np
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
def factor_processing(raw_factors: np.ndarray, def factor_processing(raw_factors: np.ndarray,
pre_process: Optional[List]=None, pre_process: Optional[List] = None,
risk_factors: Optional[np.ndarray]=None, risk_factors: Optional[np.ndarray] = None,
post_process: Optional[List]=None, post_process: Optional[List] = None,
groups=None) -> np.ndarray: groups=None) -> np.ndarray:
new_factors = raw_factors new_factors = raw_factors
if pre_process: if pre_process:
...@@ -31,7 +32,8 @@ def factor_processing(raw_factors: np.ndarray, ...@@ -31,7 +32,8 @@ def factor_processing(raw_factors: np.ndarray,
if post_process: if post_process:
for p in post_process: for p in post_process:
if p.__name__ == 'winsorize_normal': if p.__name__ == 'winsorize_normal':
alpha_logger.warning("winsorize_normal normally should not be done after neutralize") alpha_logger.warning("winsorize_normal "
"normally should not be done after neutralize")
new_factors = p(new_factors, groups=groups) new_factors = p(new_factors, groups=groups)
return new_factors return new_factors
...@@ -9,7 +9,6 @@ import numpy as np ...@@ -9,7 +9,6 @@ import numpy as np
def quantile(x: np.ndarray, n_bins: int) -> np.ndarray: def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
n = x.size n = x.size
sorter = x.argsort() sorter = x.argsort()
inv = np.empty(n, dtype=int) inv = np.empty(n, dtype=int)
...@@ -17,7 +16,7 @@ def quantile(x: np.ndarray, n_bins: int) -> np.ndarray: ...@@ -17,7 +16,7 @@ def quantile(x: np.ndarray, n_bins: int) -> np.ndarray:
bin_size = float(n) / n_bins bin_size = float(n) / n_bins
pillars = [int(i * bin_size) for i in range(1, n_bins+1)] pillars = [int(i * bin_size) for i in range(1, n_bins + 1)]
q_groups = np.empty(n, dtype=int) q_groups = np.empty(n, dtype=int)
......
...@@ -6,13 +6,14 @@ Created on 2017-8-8 ...@@ -6,13 +6,14 @@ Created on 2017-8-8
""" """
from typing import Optional from typing import Optional
import numpy as np import numpy as np
from scipy.stats import rankdata from scipy.stats import rankdata
import alphamind.utilities as utils
import alphamind.utilities as utils
def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
def rank(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
if x.ndim == 1: if x.ndim == 1:
x = x.reshape((-1, 1)) x = x.reshape((-1, 1))
...@@ -30,8 +31,7 @@ def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray: ...@@ -30,8 +31,7 @@ def rank(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray:
return (rankdata(x).astype(float) - 1.).reshape((-1, 1)) return (rankdata(x).astype(float) - 1.).reshape((-1, 1))
def percentile(x: np.ndarray, groups: Optional[np.ndarray]=None) -> np.ndarray: def percentile(x: np.ndarray, groups: Optional[np.ndarray] = None) -> np.ndarray:
if x.ndim == 1: if x.ndim == 1:
x = x.reshape((-1, 1)) x = x.reshape((-1, 1))
......
...@@ -6,17 +6,17 @@ Created on 2017-4-25 ...@@ -6,17 +6,17 @@ Created on 2017-4-25
""" """
import numpy as np import numpy as np
from alphamind.utilities import group_mapping
from alphamind.utilities import transform
from alphamind.utilities import aggregate from alphamind.utilities import aggregate
from alphamind.utilities import array_index from alphamind.utilities import array_index
from alphamind.utilities import group_mapping
from alphamind.utilities import simple_mean from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std
from alphamind.utilities import simple_sqrsum from alphamind.utilities import simple_sqrsum
from alphamind.utilities import simple_std
from alphamind.utilities import transform
def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray: def standardize(x: np.ndarray, groups: np.ndarray = None, ddof=1) -> np.ndarray:
if groups is not None: if groups is not None:
groups = group_mapping(groups) groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
...@@ -27,7 +27,7 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray: ...@@ -27,7 +27,7 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8) return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray: def projection(x: np.ndarray, groups: np.ndarray = None, axis=1) -> np.ndarray:
if groups is not None and axis == 0: if groups is not None and axis == 0:
groups = group_mapping(groups) groups = group_mapping(groups)
projected = transform(groups, x, 'project') projected = transform(groups, x, 'project')
...@@ -38,13 +38,13 @@ def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray: ...@@ -38,13 +38,13 @@ def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
class Standardizer(object): class Standardizer(object):
def __init__(self, ddof: int=1): def __init__(self, ddof: int = 1):
self.ddof = ddof self.ddof = ddof
self.mean = None self.mean = None
self.std = None self.std = None
self.labels = None self.labels = None
def fit(self, x: np.ndarray, groups: np.ndarray=None): def fit(self, x: np.ndarray, groups: np.ndarray = None):
if groups is not None: if groups is not None:
group_index = group_mapping(groups) group_index = group_mapping(groups)
self.mean = aggregate(group_index, x, 'mean') self.mean = aggregate(group_index, x, 'mean')
...@@ -54,12 +54,12 @@ class Standardizer(object): ...@@ -54,12 +54,12 @@ class Standardizer(object):
self.mean = simple_mean(x, axis=0) self.mean = simple_mean(x, axis=0)
self.std = simple_std(x, axis=0, ddof=self.ddof) self.std = simple_std(x, axis=0, ddof=self.ddof)
def transform(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
if groups is not None: if groups is not None:
index = array_index(self.labels, groups) index = array_index(self.labels, groups)
return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8) return (x - self.mean[index]) / np.maximum(self.std[index], 1e-8)
else: else:
return (x - self.mean) / np.maximum(self.std, 1e-8) return (x - self.mean) / np.maximum(self.std, 1e-8)
def __call__(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
return standardize(x, groups, self.ddof) return standardize(x, groups, self.ddof)
...@@ -6,14 +6,14 @@ Created on 2017-8-23 ...@@ -6,14 +6,14 @@ Created on 2017-8-23
""" """
import copy import copy
import pandas as pd import pandas as pd
from PyFin.api import pyFinAssert
from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder from PyFin.Analysis.SecurityValueHolders import SecurityValueHolder
from PyFin.api import pyFinAssert
from PyFin.api import transform as transform_impl from PyFin.api import transform as transform_impl
def factor_translator(factor_pool): def factor_translator(factor_pool):
if not factor_pool: if not factor_pool:
return None, None return None, None
...@@ -24,7 +24,8 @@ def factor_translator(factor_pool): ...@@ -24,7 +24,8 @@ def factor_translator(factor_pool):
elif isinstance(factor_pool, dict): elif isinstance(factor_pool, dict):
dependency = set() dependency = set()
for k, v in factor_pool.items(): for k, v in factor_pool.items():
pyFinAssert(isinstance(k, str), ValueError, 'factor_name {0} should be string.'.format(k)) pyFinAssert(isinstance(k, str), ValueError,
'factor_name {0} should be string.'.format(k))
pyFinAssert(isinstance(v, SecurityValueHolder) or isinstance(v, str), pyFinAssert(isinstance(v, SecurityValueHolder) or isinstance(v, str),
ValueError, ValueError,
'expression {0} should be a value hodler or a string.'.format(v)) 'expression {0} should be a value hodler or a string.'.format(v))
...@@ -80,5 +81,4 @@ class Transformer(object): ...@@ -80,5 +81,4 @@ class Transformer(object):
if __name__ == '__main__': if __name__ == '__main__':
transformer = Transformer(['c', 'a']) transformer = Transformer(['c', 'a'])
...@@ -5,14 +5,15 @@ Created on 2017-4-25 ...@@ -5,14 +5,15 @@ Created on 2017-4-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np
import numba as nb import numba as nb
from alphamind.utilities import group_mapping import numpy as np
from alphamind.utilities import aggregate from alphamind.utilities import aggregate
from alphamind.utilities import transform
from alphamind.utilities import array_index from alphamind.utilities import array_index
from alphamind.utilities import group_mapping
from alphamind.utilities import simple_mean from alphamind.utilities import simple_mean
from alphamind.utilities import simple_std from alphamind.utilities import simple_std
from alphamind.utilities import transform
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
...@@ -31,7 +32,6 @@ def mask_values_2d(x: np.ndarray, ...@@ -31,7 +32,6 @@ def mask_values_2d(x: np.ndarray,
res[i, j] = ubound res[i, j] = ubound
elif x[i, j] < lbound: elif x[i, j] < lbound:
res[i, j] = lbound res[i, j] = lbound
return res return res
...@@ -54,7 +54,10 @@ def mask_values_1d(x: np.ndarray, ...@@ -54,7 +54,10 @@ def mask_values_1d(x: np.ndarray,
return res return res
def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarray = None) -> np.ndarray: def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1,
groups: np.ndarray = None,
fill_method: str = 'flat',
fill_interval: int = 0.5) -> np.ndarray:
if groups is not None: if groups is not None:
groups = group_mapping(groups) groups = group_mapping(groups)
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
...@@ -69,14 +72,14 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarra ...@@ -69,14 +72,14 @@ def winsorize_normal(x: np.ndarray, num_stds: int = 3, ddof=1, groups: np.ndarra
class NormalWinsorizer(object): class NormalWinsorizer(object):
def __init__(self, num_stds: int=3, ddof=1): def __init__(self, num_stds: int = 3, ddof=1):
self.num_stds = num_stds self.num_stds = num_stds
self.ddof = ddof self.ddof = ddof
self.mean = None self.mean = None
self.std = None self.std = None
self.labels = None self.labels = None
def fit(self, x: np.ndarray, groups: np.ndarray=None): def fit(self, x: np.ndarray, groups: np.ndarray = None):
if groups is not None: if groups is not None:
group_index = group_mapping(groups) group_index = group_mapping(groups)
self.mean = aggregate(group_index, x, 'mean') self.mean = aggregate(group_index, x, 'mean')
...@@ -86,12 +89,12 @@ class NormalWinsorizer(object): ...@@ -86,12 +89,12 @@ class NormalWinsorizer(object):
self.mean = simple_mean(x, axis=0) self.mean = simple_mean(x, axis=0)
self.std = simple_std(x, axis=0, ddof=self.ddof) self.std = simple_std(x, axis=0, ddof=self.ddof)
def transform(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def transform(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
if groups is not None: if groups is not None:
index = array_index(self.labels, groups) index = array_index(self.labels, groups)
return mask_values_2d(x, self.mean[index], self.std[index], self.num_stds) return mask_values_2d(x, self.mean[index], self.std[index], self.num_stds)
else: else:
return mask_values_1d(x, self.mean, self.std, self.num_stds) return mask_values_1d(x, self.mean, self.std, self.num_stds)
def __call__(self, x: np.ndarray, groups: np.ndarray=None) -> np.ndarray: def __call__(self, x: np.ndarray, groups: np.ndarray = None) -> np.ndarray:
return winsorize_normal(x, self.num_stds, self.ddof, groups) return winsorize_normal(x, self.num_stds, self.ddof, groups)
...@@ -5,9 +5,8 @@ Created on 2017-9-22 ...@@ -5,9 +5,8 @@ Created on 2017-9-22
@author: cheng.li @author: cheng.li
""" """
import abc import abc
from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
......
...@@ -6,7 +6,9 @@ Created on 2017-9-22 ...@@ -6,7 +6,9 @@ Created on 2017-9-22
""" """
from typing import Tuple from typing import Tuple
import pandas as pd import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase from alphamind.execution.baseexecutor import ExecutorBase
......
...@@ -7,7 +7,9 @@ Created on 2017-9-25 ...@@ -7,7 +7,9 @@ Created on 2017-9-25
from typing import List from typing import List
from typing import Tuple from typing import Tuple
import pandas as pd import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase from alphamind.execution.baseexecutor import ExecutorBase
......
...@@ -6,9 +6,11 @@ Created on 2017-9-22 ...@@ -6,9 +6,11 @@ Created on 2017-9-22
""" """
from typing import Tuple from typing import Tuple
import pandas as pd import pandas as pd
from PyFin.Math.Accumulators import MovingStandardDeviation
from PyFin.Math.Accumulators import MovingAverage from PyFin.Math.Accumulators import MovingAverage
from PyFin.Math.Accumulators import MovingStandardDeviation
from alphamind.execution.baseexecutor import ExecutorBase from alphamind.execution.baseexecutor import ExecutorBase
......
...@@ -6,7 +6,9 @@ Created on 2017-9-22 ...@@ -6,7 +6,9 @@ Created on 2017-9-22
""" """
from typing import Tuple from typing import Tuple
import pandas as pd import pandas as pd
from alphamind.execution.baseexecutor import ExecutorBase from alphamind.execution.baseexecutor import ExecutorBase
......
...@@ -5,8 +5,8 @@ Created on 2017-11-27 ...@@ -5,8 +5,8 @@ Created on 2017-11-27
@author: cheng.li @author: cheng.li
""" """
from alphamind.utilities import encode
from alphamind.utilities import decode from alphamind.utilities import decode
from alphamind.utilities import encode
def encode_formula(formula): def encode_formula(formula):
......
...@@ -5,22 +5,18 @@ Created on 2017-5-2 ...@@ -5,22 +5,18 @@ Created on 2017-5-2
@author: cheng.li @author: cheng.li
""" """
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LogisticRegression from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.svm import NvSVRModel
from alphamind.model.treemodel import RandomForestClassifier from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer from alphamind.model.treemodel import XGBTrainer
from alphamind.model.svm import NvSVRModel
from alphamind.model.loader import load_model
__all__ = ['LinearRegression', __all__ = ['LinearRegression',
'LassoRegression', 'LassoRegression',
'ConstLinearModel', 'ConstLinearModel',
......
...@@ -5,24 +5,26 @@ Created on 2017-9-27 ...@@ -5,24 +5,26 @@ Created on 2017-9-27
@author: cheng.li @author: cheng.li
""" """
import copy
import bisect import bisect
import copy
from typing import Iterable from typing import Iterable
from typing import Tuple from typing import Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from simpleutils.miscellaneous import list_eq from simpleutils.miscellaneous import list_eq
from alphamind.model.modelbase import ModelBase
from alphamind.model.data_preparing import fetch_train_phase from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.engines.universe import load_universe from alphamind.data.engines.universe import load_universe
from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.rank import rank from alphamind.data.rank import rank
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.model.loader import load_model from alphamind.data.winsorize import winsorize_normal
from alphamind.model.data_preparing import fetch_predict_phase
from alphamind.model.data_preparing import fetch_train_phase
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.loader import load_model
from alphamind.model.modelbase import ModelBase
PROCESS_MAPPING = { PROCESS_MAPPING = {
'winsorize_normal': winsorize_normal, 'winsorize_normal': winsorize_normal,
...@@ -87,7 +89,7 @@ class DataMeta(object): ...@@ -87,7 +89,7 @@ class DataMeta(object):
@classmethod @classmethod
def load(cls, data_desc: dict): def load(cls, data_desc: dict):
freq = data_desc['freq'] freq = data_desc['freq']
universe =load_universe(data_desc['universe']) universe = load_universe(data_desc['universe'])
batch = data_desc['batch'] batch = data_desc['batch']
neutralized_risk = data_desc['neutralized_risk'] neutralized_risk = data_desc['neutralized_risk']
risk_model = data_desc['risk_model'] risk_model = data_desc['risk_model']
...@@ -193,7 +195,8 @@ class Composer: ...@@ -193,7 +195,8 @@ class Composer:
codes = x.index codes = x.index
return pd.DataFrame(model.predict(x).flatten(), index=codes), x return pd.DataFrame(model.predict(x).flatten(), index=codes), x
def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None, d_type: str = 'test') \ def score(self, ref_date: str, x: pd.DataFrame = None, y: np.ndarray = None,
d_type: str = 'test') \
-> Tuple[float, pd.DataFrame, pd.DataFrame]: -> Tuple[float, pd.DataFrame, pd.DataFrame]:
model = self._fetch_latest_model(ref_date) model = self._fetch_latest_model(ref_date)
if x is None or y is None: if x is None or y is None:
...@@ -244,8 +247,7 @@ class Composer: ...@@ -244,8 +247,7 @@ class Composer:
if __name__ == '__main__': if __name__ == '__main__':
from alphamind.api import (risk_styles, from alphamind.api import (industry_styles,
industry_styles,
standardize, standardize,
winsorize_normal, winsorize_normal,
DataMeta, DataMeta,
...@@ -287,13 +289,15 @@ if __name__ == '__main__': ...@@ -287,13 +289,15 @@ if __name__ == '__main__':
warm_start=warm_start, warm_start=warm_start,
data_source=data_source) data_source=data_source)
alpha_model = LinearRegression(features=regress_features, fit_intercept=True, fit_target=fit_target) alpha_model = LinearRegression(features=regress_features, fit_intercept=True,
fit_target=fit_target)
composer = Composer(alpha_model=alpha_model, data_meta=data_meta) composer = Composer(alpha_model=alpha_model, data_meta=data_meta)
start_date = '2014-01-01' start_date = '2014-01-01'
end_date = '2016-01-01' end_date = '2016-01-01'
regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target) regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept,
fit_target=fit_target)
regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta) regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta)
data_package1 = fetch_data_package(engine, data_package1 = fetch_data_package(engine,
......
...@@ -7,21 +7,23 @@ Created on 2017-8-24 ...@@ -7,21 +7,23 @@ Created on 2017-8-24
import bisect import bisect
import datetime as dt import datetime as dt
import numpy as np
import pandas as pd
from typing import Iterable from typing import Iterable
from typing import Union from typing import Union
from PyFin.api import makeSchedule
import numpy as np
import pandas as pd
from PyFin.DateUtilities import Period
from PyFin.api import BizDayConventions from PyFin.api import BizDayConventions
from PyFin.api import DateGeneration from PyFin.api import DateGeneration
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from PyFin.api import makeSchedule
from PyFin.api import pyFinAssert from PyFin.api import pyFinAssert
from PyFin.DateUtilities import Period
from alphamind.data.transformer import Transformer
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import total_risk_factors
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.data.engines.sqlengine import total_risk_factors from alphamind.data.transformer import Transformer
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq from alphamind.utilities import map_freq
...@@ -60,7 +62,7 @@ def prepare_data(engine: SqlEngine, ...@@ -60,7 +62,7 @@ def prepare_data(engine: SqlEngine,
universe: Universe, universe: Universe,
benchmark: int, benchmark: int,
warm_start: int = 0, warm_start: int = 0,
fit_target: Union[Transformer, object]=None): fit_target: Union[Transformer, object] = None):
if warm_start > 0: if warm_start > 0:
p = Period(frequency) p = Period(frequency)
p = Period(length=-warm_start * p.length(), units=p.units()) p = Period(length=-warm_start * p.length(), units=p.units())
...@@ -91,7 +93,8 @@ def prepare_data(engine: SqlEngine, ...@@ -91,7 +93,8 @@ def prepare_data(engine: SqlEngine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else: else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad')) target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
alpha_logger.info("fit target data loading finished") alpha_logger.info("fit target data loading finished")
...@@ -175,7 +178,8 @@ def batch_processing(names, ...@@ -175,7 +178,8 @@ def batch_processing(names,
inner_left_index = bisect.bisect_left(sub_dates, end) inner_left_index = bisect.bisect_left(sub_dates, end)
inner_right_index = bisect.bisect_right(sub_dates, end) inner_right_index = bisect.bisect_right(sub_dates, end)
predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index], columns=names) predict_x_buckets[end] = pd.DataFrame(ne_x[inner_left_index:inner_right_index],
columns=names)
if risk_exp is not None: if risk_exp is not None:
predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index] predict_risk_buckets[end] = this_risk_exp[inner_left_index:inner_right_index]
else: else:
...@@ -227,7 +231,8 @@ def fetch_data_package(engine: SqlEngine, ...@@ -227,7 +231,8 @@ def fetch_data_package(engine: SqlEngine,
fit_target=fit_target) fit_target=fit_target)
target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \ target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
_merge_df(engine, names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model,
neutralized_risk)
alpha_logger.info("data merging finished") alpha_logger.info("data merging finished")
...@@ -259,17 +264,24 @@ def fetch_data_package(engine: SqlEngine, ...@@ -259,17 +264,24 @@ def fetch_data_package(engine: SqlEngine,
ret['x_names'] = names ret['x_names'] = names
ret['settlement'] = target_df[target_df.trade_date >= start_date] ret['settlement'] = target_df[target_df.trade_date >= start_date]
train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date} train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if
train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date} k.strftime('%Y-%m-%d') >= start_date}
train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date} train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if
k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date} train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date} k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if
k.strftime('%Y-%m-%d') >= start_date}
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if
k.strftime('%Y-%m-%d') >= start_date}
if neutralized_risk: if neutralized_risk:
predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if k.strftime('%Y-%m-%d') >= start_date} predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if
k.strftime('%Y-%m-%d') >= start_date}
else: else:
predict_risk_buckets = None predict_risk_buckets = None
predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date} predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if
k.strftime('%Y-%m-%d') >= start_date}
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets} ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
...@@ -312,7 +324,8 @@ def fetch_train_phase(engine, ...@@ -312,7 +324,8 @@ def fetch_train_phase(engine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else: else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad')) target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
...@@ -322,10 +335,12 @@ def fetch_train_phase(engine, ...@@ -322,10 +335,12 @@ def fetch_train_phase(engine,
['trade_date', 'code'] + transformer.names] ['trade_date', 'code'] + transformer.names]
target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \ target_df, dates, date_label, risk_exp, x_values, y_values, _, _, codes = \
_merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model, neutralized_risk) _merge_df(engine, transformer.names, factor_df, target_df, universe, dates, risk_model,
neutralized_risk)
if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'): if dates[-1] == dt.datetime.strptime(ref_date, '%Y-%m-%d'):
pyFinAssert(len(dates) >= 2, ValueError, "No previous data for training for the date {0}".format(ref_date)) pyFinAssert(len(dates) >= 2, ValueError,
"No previous data for training for the date {0}".format(ref_date))
end = dates[-2] end = dates[-2]
start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0] start = dates[-batch - 1] if batch <= len(dates) - 1 else dates[0]
else: else:
...@@ -353,7 +368,8 @@ def fetch_train_phase(engine, ...@@ -353,7 +368,8 @@ def fetch_train_phase(engine,
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y, 'code': this_code} ret['train'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'y': ne_y,
'code': this_code}
return ret return ret
...@@ -392,7 +408,8 @@ def fetch_predict_phase(engine, ...@@ -392,7 +408,8 @@ def fetch_predict_phase(engine,
factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates) factor_df = engine.fetch_factor_range(universe, factors=transformer, dates=dates)
if fillna: if fillna:
factor_df = factor_df.groupby('trade_date').apply(lambda x: x.fillna(x.median())).reset_index( factor_df = factor_df.groupby('trade_date').apply(
lambda x: x.fillna(x.median())).reset_index(
drop=True).dropna() drop=True).dropna()
else: else:
factor_df = factor_df.dropna() factor_df = factor_df.dropna()
...@@ -401,7 +418,8 @@ def fetch_predict_phase(engine, ...@@ -401,7 +418,8 @@ def fetch_predict_phase(engine,
target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon) target_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
else: else:
one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency) one_more_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
target_df = engine.fetch_factor_range_forward(universe, factors=fit_target, dates=dates + [one_more_date]) target_df = engine.fetch_factor_range_forward(universe, factors=fit_target,
dates=dates + [one_more_date])
target_df = target_df[target_df.trade_date.isin(dates)] target_df = target_df[target_df.trade_date.isin(dates)]
target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad')) target_df = target_df.groupby('code').apply(lambda x: x.fillna(method='pad'))
...@@ -467,7 +485,7 @@ def fetch_predict_phase(engine, ...@@ -467,7 +485,7 @@ def fetch_predict_phase(engine,
ret = dict() ret = dict()
ret['x_names'] = transformer.names ret['x_names'] = transformer.names
ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes, 'y': ne_y.flatten()} ret['predict'] = {'x': pd.DataFrame(ne_x, columns=transformer.names), 'code': codes,
'y': ne_y.flatten()}
return ret return ret
...@@ -6,10 +6,11 @@ Created on 2017-5-10 ...@@ -6,10 +6,11 @@ Created on 2017-5-10
""" """
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression as LinearRegressionImpl from PyFin.api import pyFinAssert
from sklearn.linear_model import Lasso from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression as LinearRegressionImpl
from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl from sklearn.linear_model import LogisticRegression as LogisticRegressionImpl
from PyFin.api import pyFinAssert
from alphamind.model.modelbase import create_model_base from alphamind.model.modelbase import create_model_base
...@@ -78,7 +79,8 @@ class LinearRegression(create_model_base('sklearn')): ...@@ -78,7 +79,8 @@ class LinearRegression(create_model_base('sklearn')):
class LassoRegression(create_model_base('sklearn')): class LassoRegression(create_model_base('sklearn')):
def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None, **kwargs): def __init__(self, alpha=0.01, features=None, fit_intercept: bool = False, fit_target=None,
**kwargs):
super().__init__(features=features, fit_target=fit_target) super().__init__(features=features, fit_target=fit_target)
self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs) self.impl = Lasso(alpha=alpha, fit_intercept=fit_intercept, **kwargs)
......
...@@ -5,21 +5,20 @@ Created on 2017-9-5 ...@@ -5,21 +5,20 @@ Created on 2017-9-5
@author: cheng.li @author: cheng.li
""" """
from alphamind.model.modelbase import ModelBase
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LassoRegression from alphamind.model.linearmodel import LassoRegression
from alphamind.model.linearmodel import LinearRegression
from alphamind.model.linearmodel import LogisticRegression from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.treemodel import RandomForestRegressor from alphamind.model.modelbase import ModelBase
from alphamind.model.svm import NvSVRModel
from alphamind.model.treemodel import RandomForestClassifier from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer from alphamind.model.treemodel import XGBTrainer
from alphamind.model.svm import NvSVRModel
def load_model(model_desc: dict) -> ModelBase: def load_model(model_desc: dict) -> ModelBase:
model_name = model_desc['model_name'] model_name = model_desc['model_name']
model_name_parts = set(model_name.split('.')) model_name_parts = set(model_name.split('.'))
......
...@@ -7,16 +7,18 @@ Created on 2017-9-4 ...@@ -7,16 +7,18 @@ Created on 2017-9-4
import abc import abc
from distutils.version import LooseVersion from distutils.version import LooseVersion
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from simpleutils.miscellaneous import list_eq from simpleutils.miscellaneous import list_eq
from sklearn import __version__ as sklearn_version from sklearn import __version__ as sklearn_version
from xgboost import __version__ as xgbboot_version from xgboost import __version__ as xgbboot_version
from alphamind.data.transformer import Transformer
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
from alphamind.utilities import encode
from alphamind.utilities import decode from alphamind.utilities import decode
from alphamind.data.transformer import Transformer from alphamind.utilities import encode
class ModelBase(metaclass=abc.ABCMeta): class ModelBase(metaclass=abc.ABCMeta):
...@@ -67,7 +69,8 @@ class ModelBase(metaclass=abc.ABCMeta): ...@@ -67,7 +69,8 @@ class ModelBase(metaclass=abc.ABCMeta):
def save(self) -> dict: def save(self) -> dict:
if self.__class__.__module__ == '__main__': if self.__class__.__module__ == '__main__':
alpha_logger.warning("model is defined in a main module. The model_name may not be correct.") alpha_logger.warning(
"model is defined in a main module. The model_name may not be correct.")
model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__, model_desc = dict(model_name=self.__class__.__module__ + "." + self.__class__.__name__,
language='python', language='python',
...@@ -109,7 +112,8 @@ def create_model_base(party_name=None): ...@@ -109,7 +112,8 @@ def create_model_base(party_name=None):
elif self._lib_name == 'xgboost': elif self._lib_name == 'xgboost':
model_desc[self._lib_name + "_version"] = xgbboot_version model_desc[self._lib_name + "_version"] = xgbboot_version
else: else:
raise ValueError("3rd party lib name ({0}) is not recognized".format(self._lib_name)) raise ValueError(
"3rd party lib name ({0}) is not recognized".format(self._lib_name))
return model_desc return model_desc
@classmethod @classmethod
...@@ -121,12 +125,16 @@ def create_model_base(party_name=None): ...@@ -121,12 +125,16 @@ def create_model_base(party_name=None):
elif cls._lib_name == 'xgboost': elif cls._lib_name == 'xgboost':
current_version = xgbboot_version current_version = xgbboot_version
else: else:
raise ValueError("3rd party lib name ({0}) is not recognized".format(cls._lib_name)) raise ValueError(
"3rd party lib name ({0}) is not recognized".format(cls._lib_name))
if LooseVersion(current_version) < LooseVersion(model_desc[cls._lib_name + "_version"]): if LooseVersion(current_version) < LooseVersion(
alpha_logger.warning('Current {2} version {0} is lower than the model version {1}. ' model_desc[cls._lib_name + "_version"]):
alpha_logger.warning(
'Current {2} version {0} is lower than the model version {1}. '
'Loaded model may work incorrectly.'.format(sklearn_version, 'Loaded model may work incorrectly.'.format(sklearn_version,
model_desc[cls._lib_name], model_desc[cls._lib_name],
cls._lib_name)) cls._lib_name))
return obj_layout return obj_layout
return ExternalLibBase return ExternalLibBase
...@@ -6,6 +6,7 @@ Created on 2018-7-9 ...@@ -6,6 +6,7 @@ Created on 2018-7-9
""" """
from sklearn.svm import NuSVR from sklearn.svm import NuSVR
from alphamind.model.modelbase import create_model_base from alphamind.model.modelbase import create_model_base
......
...@@ -8,20 +8,21 @@ Created on 2017-12-4 ...@@ -8,20 +8,21 @@ Created on 2017-12-4
import arrow import arrow
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl from sklearn.ensemble import RandomForestClassifier as RandomForestClassifierImpl
from sklearn.ensemble import RandomForestRegressor as RandomForestRegressorImpl
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import xgboost as xgb
from xgboost import XGBRegressor as XGBRegressorImpl
from xgboost import XGBClassifier as XGBClassifierImpl from xgboost import XGBClassifier as XGBClassifierImpl
from xgboost import XGBRegressor as XGBRegressorImpl
from alphamind.model.modelbase import create_model_base from alphamind.model.modelbase import create_model_base
class RandomForestRegressor(create_model_base('sklearn')): class RandomForestRegressor(create_model_base('sklearn')):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int = 100,
max_features: str='auto', max_features: str = 'auto',
features=None, features=None,
fit_target=None, fit_target=None,
**kwargs): **kwargs):
...@@ -38,8 +39,8 @@ class RandomForestRegressor(create_model_base('sklearn')): ...@@ -38,8 +39,8 @@ class RandomForestRegressor(create_model_base('sklearn')):
class RandomForestClassifier(create_model_base('sklearn')): class RandomForestClassifier(create_model_base('sklearn')):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int = 100,
max_features: str='auto', max_features: str = 'auto',
features=None, features=None,
fit_target=None, fit_target=None,
**kwargs): **kwargs):
...@@ -56,13 +57,13 @@ class RandomForestClassifier(create_model_base('sklearn')): ...@@ -56,13 +57,13 @@ class RandomForestClassifier(create_model_base('sklearn')):
class XGBRegressor(create_model_base('xgboost')): class XGBRegressor(create_model_base('xgboost')):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int = 100,
learning_rate: float=0.1, learning_rate: float = 0.1,
max_depth: int=3, max_depth: int = 3,
features=None, features=None,
fit_target=None, fit_target=None,
n_jobs: int=1, n_jobs: int = 1,
missing: float=np.nan, missing: float = np.nan,
**kwargs): **kwargs):
super().__init__(features=features, fit_target=fit_target) super().__init__(features=features, fit_target=fit_target)
self.impl = XGBRegressorImpl(n_estimators=n_estimators, self.impl = XGBRegressorImpl(n_estimators=n_estimators,
...@@ -80,13 +81,13 @@ class XGBRegressor(create_model_base('xgboost')): ...@@ -80,13 +81,13 @@ class XGBRegressor(create_model_base('xgboost')):
class XGBClassifier(create_model_base('xgboost')): class XGBClassifier(create_model_base('xgboost')):
def __init__(self, def __init__(self,
n_estimators: int=100, n_estimators: int = 100,
learning_rate: float=0.1, learning_rate: float = 0.1,
max_depth: int=3, max_depth: int = 3,
features=None, features=None,
fit_target=None, fit_target=None,
n_jobs: int=1, n_jobs: int = 1,
missing: float=np.nan, missing: float = np.nan,
**kwargs): **kwargs):
super().__init__(features=features, fit_target=fit_target) super().__init__(features=features, fit_target=fit_target)
self.impl = XGBClassifierImpl(n_estimators=n_estimators, self.impl = XGBClassifierImpl(n_estimators=n_estimators,
...@@ -108,8 +109,8 @@ class XGBTrainer(create_model_base('xgboost')): ...@@ -108,8 +109,8 @@ class XGBTrainer(create_model_base('xgboost')):
objective='binary:logistic', objective='binary:logistic',
booster='gbtree', booster='gbtree',
tree_method='hist', tree_method='hist',
n_estimators: int=100, n_estimators: int = 100,
learning_rate: float=0.1, learning_rate: float = 0.1,
max_depth=3, max_depth=3,
eval_sample=None, eval_sample=None,
early_stopping_rounds=None, early_stopping_rounds=None,
...@@ -117,8 +118,8 @@ class XGBTrainer(create_model_base('xgboost')): ...@@ -117,8 +118,8 @@ class XGBTrainer(create_model_base('xgboost')):
colsample_bytree=1., colsample_bytree=1.,
features=None, features=None,
fit_target=None, fit_target=None,
random_state: int=0, random_state: int = 0,
n_jobs: int=1, n_jobs: int = 1,
**kwargs): **kwargs):
super().__init__(features=features, fit_target=fit_target) super().__init__(features=features, fit_target=fit_target)
self.params = { self.params = {
...@@ -173,8 +174,3 @@ class XGBTrainer(create_model_base('xgboost')): ...@@ -173,8 +174,3 @@ class XGBTrainer(create_model_base('xgboost')):
imps = self.impl.get_fscore().items() imps = self.impl.get_fscore().items()
imps = sorted(imps, key=lambda x: x[0]) imps = sorted(imps, key=lambda x: x[0])
return list(zip(*imps))[1] return list(zip(*imps))[1]
...@@ -5,18 +5,19 @@ Created on 2017-7-21 ...@@ -5,18 +5,19 @@ Created on 2017-7-21
@author: cheng.li @author: cheng.li
""" """
from deprecated import deprecated
from math import inf
import numpy as np
import pandas as pd
from enum import IntEnum from enum import IntEnum
from typing import Tuple from math import inf
from typing import Optional
from typing import Dict from typing import Dict
from typing import Iterable
from typing import List from typing import List
from typing import Optional
from typing import Tuple
from typing import Union from typing import Union
from typing import Iterable
import numpy as np
import pandas as pd
from PyFin.api import pyFinAssert from PyFin.api import pyFinAssert
from deprecated import deprecated
class BoundaryDirection(IntEnum): class BoundaryDirection(IntEnum):
...@@ -43,11 +44,14 @@ class BoundaryImpl(object): ...@@ -43,11 +44,14 @@ class BoundaryImpl(object):
self._validation() self._validation()
def _validation(self): def _validation(self):
pyFinAssert(self.b_type in [BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL, BoundaryType.MINABSREL], pyFinAssert(
self.b_type in [BoundaryType.ABSOLUTE, BoundaryType.RELATIVE, BoundaryType.MAXABSREL,
BoundaryType.MINABSREL],
ValueError, ValueError,
"Boundary Type {0} is not recognized".format(self.b_type)) "Boundary Type {0} is not recognized".format(self.b_type))
pyFinAssert(self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER, pyFinAssert(
self.direction == BoundaryDirection.LOWER or self.direction == BoundaryDirection.UPPER,
ValueError, ValueError,
"Boundary direction {0} is not recognized".format(self.direction)) "Boundary direction {0} is not recognized".format(self.direction))
...@@ -77,7 +81,8 @@ class BoundaryImpl(object): ...@@ -77,7 +81,8 @@ class BoundaryImpl(object):
abs_bound = center + abs_threshold abs_bound = center + abs_threshold
return min(rel_bound, abs_bound) return min(rel_bound, abs_bound)
else: else:
pyFinAssert(center >= 0., ValueError, "relative bounds only support positive back bone value") pyFinAssert(center >= 0., ValueError,
"relative bounds only support positive back bone value")
return self.val * center return self.val * center
...@@ -129,7 +134,7 @@ class LinearConstraints(object): ...@@ -129,7 +134,7 @@ class LinearConstraints(object):
def __init__(self, def __init__(self,
bounds: Dict[str, BoxBoundary], bounds: Dict[str, BoxBoundary],
cons_mat: pd.DataFrame, cons_mat: pd.DataFrame,
backbone: np.ndarray=None): backbone: np.ndarray = None):
self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns))) self.names = list(set(bounds.keys()).intersection(set(cons_mat.columns)))
self.bounds = bounds self.bounds = bounds
self.cons_mat = cons_mat self.cons_mat = cons_mat
...@@ -159,7 +164,8 @@ class LinearConstraints(object): ...@@ -159,7 +164,8 @@ class LinearConstraints(object):
return self.cons_mat[self.names].values return self.cons_mat[self.names].values
@deprecated(reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.") @deprecated(
reason="Constraints is deprecated in alpha-mind 0.1.1. Please use LinearConstraints instead.")
class Constraints(object): class Constraints(object):
def __init__(self, def __init__(self,
...@@ -185,7 +191,8 @@ class Constraints(object): ...@@ -185,7 +191,8 @@ class Constraints(object):
def add_exposure(self, tags: np.ndarray, new_exp: np.ndarray): def add_exposure(self, tags: np.ndarray, new_exp: np.ndarray):
if len(tags) != new_exp.shape[1]: if len(tags) != new_exp.shape[1]:
raise ValueError('new dags length is not compatible with exposure shape {1}'.format(len(tags), raise ValueError(
'new dags length is not compatible with exposure shape {1}'.format(len(tags),
new_exp.shape)) new_exp.shape))
for tag in tags: for tag in tags:
......
...@@ -9,7 +9,6 @@ import numpy as np ...@@ -9,7 +9,6 @@ import numpy as np
def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray: def evolve_positions(positions: np.ndarray, dx_ret: np.ndarray) -> np.ndarray:
# assume return is log return # assume return is log return
simple_return = np.exp(dx_ret) simple_return = np.exp(dx_ret)
......
...@@ -5,12 +5,14 @@ Created on 2017-5-5 ...@@ -5,12 +5,14 @@ Created on 2017-5-5
@author: cheng.li @author: cheng.li
""" """
import numpy as np
from typing import Tuple from typing import Tuple
from typing import Union from typing import Union
from alphamind.exceptions.exceptions import PortfolioBuilderException
import numpy as np
from alphamind.cython.optimizers import LPOptimizer from alphamind.cython.optimizers import LPOptimizer
from alphamind.exceptions.exceptions import PortfolioBuilderException
def linear_builder(er: np.ndarray, def linear_builder(er: np.ndarray,
lbound: Union[np.ndarray, float], lbound: Union[np.ndarray, float],
...@@ -19,7 +21,7 @@ def linear_builder(er: np.ndarray, ...@@ -19,7 +21,7 @@ def linear_builder(er: np.ndarray,
risk_target: Tuple[np.ndarray, np.ndarray], risk_target: Tuple[np.ndarray, np.ndarray],
turn_over_target: float = None, turn_over_target: float = None,
current_position: np.ndarray = None, current_position: np.ndarray = None,
method: str='ecos') -> Tuple[str, np.ndarray, np.ndarray]: method: str = 'ecos') -> Tuple[str, np.ndarray, np.ndarray]:
er = er.flatten() er = er.flatten()
n, m = risk_constraints.shape n, m = risk_constraints.shape
......
...@@ -6,6 +6,7 @@ Created on 2017-5-9 ...@@ -6,6 +6,7 @@ Created on 2017-5-9
""" """
import numpy as np import numpy as np
from alphamind.utilities import group_mapping from alphamind.utilities import group_mapping
from alphamind.utilities import simple_abssum from alphamind.utilities import simple_abssum
from alphamind.utilities import transform from alphamind.utilities import transform
......
...@@ -5,14 +5,16 @@ Created on 2017-6-27 ...@@ -5,14 +5,16 @@ Created on 2017-6-27
@author: cheng.li @author: cheng.li
""" """
import numpy as np
from typing import Union
from typing import Tuple
from typing import Optional
from typing import Dict from typing import Dict
from typing import Optional
from typing import Tuple
from typing import Union
import cvxpy import cvxpy
from alphamind.cython.optimizers import QPOptimizer import numpy as np
from alphamind.cython.optimizers import CVOptimizer from alphamind.cython.optimizers import CVOptimizer
from alphamind.cython.optimizers import QPOptimizer
from alphamind.exceptions.exceptions import PortfolioBuilderException from alphamind.exceptions.exceptions import PortfolioBuilderException
...@@ -52,9 +54,10 @@ def mean_variance_builder(er: np.ndarray, ...@@ -52,9 +54,10 @@ def mean_variance_builder(er: np.ndarray,
ubound: Union[np.ndarray, float], ubound: Union[np.ndarray, float],
risk_exposure: Optional[np.ndarray], risk_exposure: Optional[np.ndarray],
risk_target: Optional[Tuple[np.ndarray, np.ndarray]], risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
lam: float=1., lam: float = 1.,
linear_solver: str='ma27') -> Tuple[str, float, np.ndarray]: linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target) lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
risk_target)
if np.all(lbound == -np.inf) and np.all(ubound == np.inf) and cons_mat is None: if np.all(lbound == -np.inf) and np.all(ubound == np.inf) and cons_mat is None:
# using fast path cvxpy # using fast path cvxpy
...@@ -102,8 +105,9 @@ def target_vol_builder(er: np.ndarray, ...@@ -102,8 +105,9 @@ def target_vol_builder(er: np.ndarray,
risk_exposure: Optional[np.ndarray], risk_exposure: Optional[np.ndarray],
risk_target: Optional[Tuple[np.ndarray, np.ndarray]], risk_target: Optional[Tuple[np.ndarray, np.ndarray]],
vol_target: float = 1., vol_target: float = 1.,
linear_solver: str = 'ma27')-> Tuple[str, float, np.ndarray]: linear_solver: str = 'ma27') -> Tuple[str, float, np.ndarray]:
lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure, risk_target) lbound, ubound, cons_mat, clbound, cubound = _create_bounds(lbound, ubound, bm, risk_exposure,
risk_target)
optimizer = CVOptimizer(er, optimizer = CVOptimizer(er,
risk_model['cov'], risk_model['cov'],
...@@ -119,6 +123,3 @@ def target_vol_builder(er: np.ndarray, ...@@ -119,6 +123,3 @@ def target_vol_builder(er: np.ndarray,
linear_solver=linear_solver) linear_solver=linear_solver)
return _create_result(optimizer, bm) return _create_result(optimizer, bm)
...@@ -8,12 +8,13 @@ Created on 2017-5-4 ...@@ -8,12 +8,13 @@ Created on 2017-5-4
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import zeros_like from numpy import zeros_like
from alphamind.utilities import groupby from alphamind.utilities import groupby
from alphamind.utilities import set_value from alphamind.utilities import set_value
def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None, masks: np.ndarray=None) -> np.ndarray: def percent_build(er: np.ndarray, percent: float, groups: np.ndarray = None,
masks: np.ndarray = None) -> np.ndarray:
er = er.copy() er = er.copy()
if masks is not None: if masks is not None:
...@@ -28,7 +29,7 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None, masks ...@@ -28,7 +29,7 @@ def percent_build(er: np.ndarray, percent: float, groups: np.ndarray=None, masks
index_diff, order = groupby(groups) index_diff, order = groupby(groups)
start = 0 start = 0
for diff_loc in index_diff: for diff_loc in index_diff:
current_index = order[start:diff_loc+1] current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort() current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1 current_ordering.shape = -1, 1
use_rank = int(percent * len(current_index)) use_rank = int(percent * len(current_index))
......
...@@ -8,12 +8,13 @@ Created on 2017-4-26 ...@@ -8,12 +8,13 @@ Created on 2017-4-26
import numpy as np import numpy as np
from numpy import zeros from numpy import zeros
from numpy import zeros_like from numpy import zeros_like
from alphamind.utilities import groupby from alphamind.utilities import groupby
from alphamind.utilities import set_value from alphamind.utilities import set_value
def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None, masks: np.ndarray=None) -> np.ndarray: def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray = None,
masks: np.ndarray = None) -> np.ndarray:
er = er.copy() er = er.copy()
if masks is not None: if masks is not None:
...@@ -28,7 +29,7 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None, masks: np ...@@ -28,7 +29,7 @@ def rank_build(er: np.ndarray, use_rank: int, groups: np.ndarray=None, masks: np
index_diff, order = groupby(groups) index_diff, order = groupby(groups)
start = 0 start = 0
for diff_loc in index_diff: for diff_loc in index_diff:
current_index = order[start:diff_loc+1] current_index = order[start:diff_loc + 1]
current_ordering = neg_er[current_index].argsort() current_ordering = neg_er[current_index].argsort()
current_ordering.shape = -1, 1 current_ordering.shape = -1, 1
set_value(weights, current_index[current_ordering[:use_rank]], 1.) set_value(weights, current_index[current_ordering[:use_rank]], 1.)
......
...@@ -7,6 +7,7 @@ Created on 2018-5-29 ...@@ -7,6 +7,7 @@ Created on 2018-5-29
import abc import abc
from typing import List from typing import List
import pandas as pd import pandas as pd
...@@ -22,13 +23,13 @@ class FullRiskModel(RiskModel): ...@@ -22,13 +23,13 @@ class FullRiskModel(RiskModel):
self.codes = sec_cov.index.tolist() self.codes = sec_cov.index.tolist()
self.sec_cov = sec_cov.loc[self.codes, self.codes] self.sec_cov = sec_cov.loc[self.codes, self.codes]
def get_cov(self, codes: List[int]=None): def get_cov(self, codes: List[int] = None):
if codes: if codes:
return self.sec_cov.loc[codes, codes].values return self.sec_cov.loc[codes, codes].values
else: else:
return self.sec_cov.values return self.sec_cov.values
def get_risk_profile(self, codes: List[int]=None): def get_risk_profile(self, codes: List[int] = None):
return dict( return dict(
cov=self.get_cov(codes), cov=self.get_cov(codes),
factor_cov=None, factor_cov=None,
...@@ -51,7 +52,7 @@ class FactorRiskModel(RiskModel): ...@@ -51,7 +52,7 @@ class FactorRiskModel(RiskModel):
self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names] self.factor_cov = self.factor_cov.loc[self.factor_names, self.factor_names]
self.idsync = self.idsync[self.codes] self.idsync = self.idsync[self.codes]
def get_risk_exp(self, codes: List[int]=None): def get_risk_exp(self, codes: List[int] = None):
if codes: if codes:
return self.risk_exp.loc[codes, :].values return self.risk_exp.loc[codes, :].values
else: else:
...@@ -60,13 +61,13 @@ class FactorRiskModel(RiskModel): ...@@ -60,13 +61,13 @@ class FactorRiskModel(RiskModel):
def get_factor_cov(self): def get_factor_cov(self):
return self.factor_cov.values return self.factor_cov.values
def get_idsync(self, codes: List[int]=None): def get_idsync(self, codes: List[int] = None):
if codes: if codes:
return self.idsync[codes].values return self.idsync[codes].values
else: else:
return self.idsync.values return self.idsync.values
def get_risk_profile(self, codes: List[int]=None): def get_risk_profile(self, codes: List[int] = None):
return dict( return dict(
cov=None, cov=None,
factor_cov=self.get_factor_cov(), factor_cov=self.get_factor_cov(),
......
...@@ -11,9 +11,8 @@ import pandas as pd ...@@ -11,9 +11,8 @@ import pandas as pd
def simple_settle(weights: np.ndarray, def simple_settle(weights: np.ndarray,
dx_return: np.ndarray, dx_return: np.ndarray,
groups: np.ndarray=None, groups: np.ndarray = None,
benchmark: np.ndarray=None) -> pd.DataFrame: benchmark: np.ndarray = None) -> pd.DataFrame:
weights = weights.flatten() weights = weights.flatten()
dx_return = dx_return.flatten() dx_return = dx_return.flatten()
...@@ -45,5 +44,3 @@ def simple_settle(weights: np.ndarray, ...@@ -45,5 +44,3 @@ def simple_settle(weights: np.ndarray,
return pd.DataFrame({'er': ret_agg.values, return pd.DataFrame({'er': ret_agg.values,
'ic': ic_table.values}, 'ic': ic_table.values},
index=ret_agg.index) index=ret_agg.index)
{ {
"strategy_name": "sample_strategy", "strategy_name": "sample_strategy",
"data_process": {
"data_process": "pre_process": [
{ "winsorize",
"pre_process": ["winsorize", "standardize"], "standardize"
"neutralize_risk": ["SIZE", "industry_styles"], ],
"post_process": ["winsorize", "standardize"] "neutralize_risk": [
"SIZE",
"industry_styles"
],
"post_process": [
"winsorize",
"standardize"
]
}, },
"risk_model": {
"risk_model":
{
"type": "short", "type": "short",
"neutralize_risk": ["SIZE", "industry_styles"] "neutralize_risk": [
"SIZE",
"industry_styles"
]
}, },
"alpha_model": {
"alpha_model":
{
"model_type": "LinearRegression", "model_type": "LinearRegression",
"features": ["EPS", "ROEDiluted"], "features": [
"parameters": "EPS",
{ "ROEDiluted"
],
"parameters": {
"fit_intercept": false "fit_intercept": false
} }
}, },
"freq": "1d", "freq": "1d",
"batch": 4, "batch": 4,
"warm_start": 0, "warm_start": 0,
"universe": ["zz500", ["zz500"]], "universe": [
"zz500",
[
"zz500"
]
],
"benchmark": 905, "benchmark": 905,
"optimizer": {
"optimizer":
{
"type": "risk_neutral", "type": "risk_neutral",
"neutralize_risk": ["SIZE", "industry_styles"] "neutralize_risk": [
"SIZE",
"industry_styles"
]
}, },
"executor": {
"executor":
{
"type": "naive" "type": "naive"
} }
} }
\ No newline at end of file
...@@ -6,24 +6,26 @@ Created on 2018-5-3 ...@@ -6,24 +6,26 @@ Created on 2018-5-3
""" """
import copy import copy
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from PyFin.api import makeSchedule
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from alphamind.utilities import map_freq from PyFin.api import makeSchedule
from alphamind.utilities import alpha_logger
from alphamind.model.composer import train_model from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.engines.sqlengine import industry_styles from alphamind.data.engines.sqlengine import industry_styles
from alphamind.data.engines.sqlengine import macro_styles from alphamind.data.engines.sqlengine import macro_styles
from alphamind.data.engines.sqlengine import risk_styles
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.analysis.factoranalysis import er_portfolio_analysis
from alphamind.exceptions.exceptions import PortfolioBuilderException from alphamind.exceptions.exceptions import PortfolioBuilderException
from alphamind.execution.naiveexecutor import NaiveExecutor
from alphamind.model.composer import train_model
from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
from alphamind.utilities import alpha_logger
from alphamind.utilities import map_freq
all_styles = risk_styles + industry_styles + macro_styles all_styles = risk_styles + industry_styles + macro_styles
...@@ -119,7 +121,8 @@ class Strategy(object): ...@@ -119,7 +121,8 @@ class Strategy(object):
self.index_return = self.engine.fetch_dx_return_index_range(self.benchmark, self.index_return = self.engine.fetch_dx_return_index_range(self.benchmark,
dates=self.dates, dates=self.dates,
horizon=self.horizon, horizon=self.horizon,
offset=1).set_index('trade_date') offset=1).set_index(
'trade_date')
self.total_data = total_data self.total_data = total_data
def prepare_backtest_models(self): def prepare_backtest_models(self):
...@@ -129,13 +132,16 @@ class Strategy(object): ...@@ -129,13 +132,16 @@ class Strategy(object):
if self.dask_client is None: if self.dask_client is None:
models = {} models = {}
for ref_date, _ in total_data_groups: for ref_date, _ in total_data_groups:
models[ref_date], _, _ = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) models[ref_date], _, _ = train_model(ref_date.strftime('%Y-%m-%d'),
self.alpha_model, self.data_meta)
else: else:
def worker(parameters): def worker(parameters):
new_model, _, _ = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) new_model, _, _ = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1],
parameters[2])
return parameters[0], new_model return parameters[0], new_model
l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in
total_data_groups])
results = self.dask_client.gather(l) results = self.dask_client.gather(l)
models = dict(results) models = dict(results)
self.alpha_models = models self.alpha_models = models
...@@ -252,16 +258,19 @@ class Strategy(object): ...@@ -252,16 +258,19 @@ class Strategy(object):
positions['dx'] = self.total_data.dx.values positions['dx'] = self.total_data.dx.values
trade_dates = positions.trade_date.unique() trade_dates = positions.trade_date.unique()
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags}, index=trade_dates) ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverags},
index=trade_dates)
ret_df['benchmark_returns'] = self.index_return['dx'] ret_df['benchmark_returns'] = self.index_return['dx']
ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0. ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0.
ret_df = ret_df.shift(1) ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0. ret_df.iloc[0] = 0.
ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] ret_df['excess_return'] = ret_df['returns'] - ret_df['benchmark_returns'] * ret_df[
'leverage']
return ret_df, positions return ret_df, positions
def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, risk_model, def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound,
risk_model,
current_position): current_position):
more_opts = running_setting.more_opts more_opts = running_setting.more_opts
try: try:
...@@ -277,7 +286,8 @@ class Strategy(object): ...@@ -277,7 +286,8 @@ class Strategy(object):
current_position=current_position, current_position=current_position,
target_vol=more_opts.get('target_vol'), target_vol=more_opts.get('target_vol'),
risk_model=risk_model, risk_model=risk_model,
turn_over_target=more_opts.get('turn_over_target')) turn_over_target=more_opts.get(
'turn_over_target'))
except PortfolioBuilderException: except PortfolioBuilderException:
alpha_logger.warning("Not able to fit the constraints. Using full re-balance.") alpha_logger.warning("Not able to fit the constraints. Using full re-balance.")
target_pos, _ = er_portfolio_analysis(er, target_pos, _ = er_portfolio_analysis(er,
...@@ -297,16 +307,12 @@ class Strategy(object): ...@@ -297,16 +307,12 @@ class Strategy(object):
if __name__ == '__main__': if __name__ == '__main__':
import os import os
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from dask.distributed import Client
from PyFin.api import CSQuantiles from PyFin.api import CSQuantiles
from PyFin.api import LAST from PyFin.api import LAST
from alphamind.api import Universe from alphamind.api import Universe
from alphamind.api import ConstLinearModel from alphamind.api import ConstLinearModel
from alphamind.api import XGBTrainer
from alphamind.api import DataMeta from alphamind.api import DataMeta
from alphamind.api import industry_list from alphamind.api import industry_list
from alphamind.api import winsorize_normal
from alphamind.api import standardize
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
from matplotlib.pylab import mpl from matplotlib.pylab import mpl
......
...@@ -6,14 +6,16 @@ Created on 2017-5-25 ...@@ -6,14 +6,16 @@ Created on 2017-5-25
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.standardize import standardize from alphamind.analysis.factoranalysis import factor_analysis
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.portfolio.constraints import Constraints
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.analysis.factoranalysis import factor_analysis from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal
from alphamind.portfolio.constraints import Constraints
class TestFactorAnalysis(unittest.TestCase): class TestFactorAnalysis(unittest.TestCase):
...@@ -29,14 +31,16 @@ class TestFactorAnalysis(unittest.TestCase): ...@@ -29,14 +31,16 @@ class TestFactorAnalysis(unittest.TestCase):
new_factor = factor_processing(self.raw_factor, new_factor = factor_processing(self.raw_factor,
pre_process=[standardize, winsorize_normal]) pre_process=[standardize, winsorize_normal])
np.testing.assert_array_almost_equal(new_factor, winsorize_normal(standardize(self.raw_factor))) np.testing.assert_array_almost_equal(new_factor,
winsorize_normal(standardize(self.raw_factor)))
new_factor = factor_processing(self.raw_factor, new_factor = factor_processing(self.raw_factor,
pre_process=[standardize, winsorize_normal], pre_process=[standardize, winsorize_normal],
risk_factors=self.risk_factor) risk_factors=self.risk_factor)
np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor, np.testing.assert_array_almost_equal(new_factor, neutralize(self.risk_factor,
winsorize_normal(standardize(self.raw_factor)))) winsorize_normal(standardize(
self.raw_factor))))
def test_factor_analysis(self): def test_factor_analysis(self):
benchmark = np.random.randint(50, size=1000) benchmark = np.random.randint(50, size=1000)
...@@ -64,7 +68,8 @@ class TestFactorAnalysis(unittest.TestCase): ...@@ -64,7 +68,8 @@ class TestFactorAnalysis(unittest.TestCase):
weight = weight_table.weight weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0) self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor) np.testing.assert_array_almost_equal(weight @ self.risk_factor,
benchmark @ self.risk_factor)
self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values) self.assertTrue(weight @ factor_df.values > benchmark @ factor_df.values)
def test_factor_analysis_with_several_factors(self): def test_factor_analysis_with_several_factors(self):
...@@ -92,7 +97,8 @@ class TestFactorAnalysis(unittest.TestCase): ...@@ -92,7 +97,8 @@ class TestFactorAnalysis(unittest.TestCase):
weight = weight_table.weight weight = weight_table.weight
self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0) self.assertEqual(analysis_table['er'].sum() / analysis_table['er'].iloc[-1], 2.0)
np.testing.assert_array_almost_equal(weight @ self.risk_factor, benchmark @ self.risk_factor) np.testing.assert_array_almost_equal(weight @ self.risk_factor,
benchmark @ self.risk_factor)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -6,8 +6,10 @@ Created on 2017-5-12 ...@@ -6,8 +6,10 @@ Created on 2017-5-12
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.analysis.perfanalysis import perf_attribution_by_pos from alphamind.analysis.perfanalysis import perf_attribution_by_pos
...@@ -35,7 +37,8 @@ class TestPerformanceAnalysis(unittest.TestCase): ...@@ -35,7 +37,8 @@ class TestPerformanceAnalysis(unittest.TestCase):
aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum() aggregated_to_explain = pd.Series(to_explain).groupby(dates).sum()
aggregated_explained = explained_table.sum(axis=1) aggregated_explained = explained_table.sum(axis=1)
np.testing.assert_array_almost_equal(aggregated_to_explain.values, aggregated_explained.values) np.testing.assert_array_almost_equal(aggregated_to_explain.values,
aggregated_explained.values)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -6,14 +6,16 @@ Created on 2017-8-16 ...@@ -6,14 +6,16 @@ Created on 2017-8-16
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.analysis.quantileanalysis import er_quantile_analysis from alphamind.analysis.quantileanalysis import er_quantile_analysis
from alphamind.analysis.quantileanalysis import quantile_analysis from alphamind.analysis.quantileanalysis import quantile_analysis
from alphamind.data.processing import factor_processing from alphamind.data.processing import factor_processing
from alphamind.data.quantile import quantile
from alphamind.data.standardize import standardize from alphamind.data.standardize import standardize
from alphamind.data.winsorize import winsorize_normal from alphamind.data.winsorize import winsorize_normal
from alphamind.data.quantile import quantile
class TestQuantileAnalysis(unittest.TestCase): class TestQuantileAnalysis(unittest.TestCase):
......
...@@ -6,8 +6,10 @@ Created on 2017-5-8 ...@@ -6,8 +6,10 @@ Created on 2017-5-8
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.analysis.riskanalysis import risk_analysis from alphamind.analysis.riskanalysis import risk_analysis
......
...@@ -6,10 +6,11 @@ Created on 2017-11-1 ...@@ -6,10 +6,11 @@ Created on 2017-11-1
""" """
import unittest import unittest
import numpy as np import numpy as np
from alphamind.cython.optimizers import CVOptimizer
from alphamind.cython.optimizers import LPOptimizer from alphamind.cython.optimizers import LPOptimizer
from alphamind.cython.optimizers import QPOptimizer from alphamind.cython.optimizers import QPOptimizer
from alphamind.cython.optimizers import CVOptimizer
class TestOptimizers(unittest.TestCase): class TestOptimizers(unittest.TestCase):
......
...@@ -7,27 +7,29 @@ Created on 2018-4-17 ...@@ -7,27 +7,29 @@ Created on 2018-4-17
import random import random
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import rankdata from PyFin.api import CSQuantiles
from sqlalchemy import select, and_, or_ from PyFin.api import CSRank
from PyFin.api import makeSchedule
from PyFin.api import advanceDateByCalendar from PyFin.api import advanceDateByCalendar
from PyFin.api import bizDatesList from PyFin.api import bizDatesList
from PyFin.api import CSRank from PyFin.api import makeSchedule
from PyFin.api import CSQuantiles from scipy.stats import rankdata
from alphamind.tests.test_suite import SKIP_ENGINE_TESTS from sqlalchemy import select, and_, or_
from alphamind.tests.test_suite import DATA_ENGINE_URI
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import IndexMarket
from alphamind.data.dbmodel.models import IndexComponent from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data.dbmodel.models import Uqer from alphamind.data.dbmodel.models import IndexMarket
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import Market
from alphamind.data.dbmodel.models import RiskCovShort from alphamind.data.dbmodel.models import RiskCovShort
from alphamind.data.dbmodel.models import RiskExposure from alphamind.data.dbmodel.models import RiskExposure
from alphamind.data.dbmodel.models import Industry from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.engines.sqlengine import SqlEngine from alphamind.data.engines.sqlengine import SqlEngine
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.tests.test_suite import DATA_ENGINE_URI
from alphamind.tests.test_suite import SKIP_ENGINE_TESTS
from alphamind.utilities import alpha_logger from alphamind.utilities import alpha_logger
...@@ -149,7 +151,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -149,7 +151,8 @@ class TestSqlEngine(unittest.TestCase):
universe = Universe('zz500') + Universe('zz1000') universe = Universe('zz500') + Universe('zz1000')
codes = self.engine.fetch_codes(ref_date, universe) codes = self.engine.fetch_codes(ref_date, universe)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset, benchmark=benchmark) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset,
benchmark=benchmark)
start_date = advanceDateByCalendar('china.sse', ref_date, '2b') start_date = advanceDateByCalendar('china.sse', ref_date, '2b')
end_date = advanceDateByCalendar('china.sse', ref_date, '6b') end_date = advanceDateByCalendar('china.sse', ref_date, '6b')
...@@ -172,7 +175,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -172,7 +175,8 @@ class TestSqlEngine(unittest.TestCase):
df = pd.read_sql(query, con=self.engine.engine) df = pd.read_sql(query, con=self.engine.engine)
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values - b_res.chgPct.values) np.testing.assert_array_almost_equal(dx_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
horizon = 4 horizon = 4
offset = 0 offset = 0
...@@ -180,7 +184,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -180,7 +184,8 @@ class TestSqlEngine(unittest.TestCase):
universe = Universe('zz500') + Universe('zz1000') universe = Universe('zz500') + Universe('zz1000')
codes = self.engine.fetch_codes(ref_date, universe) codes = self.engine.fetch_codes(ref_date, universe)
dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset, benchmark=benchmark) dx_return = self.engine.fetch_dx_return(ref_date, codes, horizon=horizon, offset=offset,
benchmark=benchmark)
start_date = advanceDateByCalendar('china.sse', ref_date, '1b') start_date = advanceDateByCalendar('china.sse', ref_date, '1b')
end_date = advanceDateByCalendar('china.sse', ref_date, '5b') end_date = advanceDateByCalendar('china.sse', ref_date, '5b')
...@@ -203,7 +208,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -203,7 +208,8 @@ class TestSqlEngine(unittest.TestCase):
df = pd.read_sql(query, con=self.engine.engine) df = pd.read_sql(query, con=self.engine.engine)
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
np.testing.assert_array_almost_equal(dx_return.dx.values, res.chgPct.values - b_res.chgPct.values) np.testing.assert_array_almost_equal(dx_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
def test_sql_engine_fetch_dx_return_range(self): def test_sql_engine_fetch_dx_return_range(self):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'), ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
...@@ -276,7 +282,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -276,7 +282,8 @@ class TestSqlEngine(unittest.TestCase):
b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum()) b_res = df.groupby('code').apply(lambda x: np.log(1. + x).sum())
calculated_return = dx_return[dx_return.trade_date == ref_date] calculated_return = dx_return[dx_return.trade_date == ref_date]
np.testing.assert_array_almost_equal(calculated_return.dx.values, res.chgPct.values - b_res.chgPct.values) np.testing.assert_array_almost_equal(calculated_return.dx.values,
res.chgPct.values - b_res.chgPct.values)
def test_sql_engine_fetch_dx_return_with_universe_adjustment(self): def test_sql_engine_fetch_dx_return_with_universe_adjustment(self):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', '2017-01-26', '-6m'), ref_dates = makeSchedule(advanceDateByCalendar('china.sse', '2017-01-26', '-6m'),
...@@ -404,7 +411,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -404,7 +411,8 @@ class TestSqlEngine(unittest.TestCase):
ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'), ref_dates = makeSchedule(advanceDateByCalendar('china.sse', self.ref_date, '-6m'),
self.ref_date, self.ref_date,
'60b', 'china.sse') '60b', 'china.sse')
ref_dates = ref_dates + [advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d')] ref_dates = ref_dates + [
advanceDateByCalendar('china.sse', ref_dates[-1], '60b').strftime('%Y-%m-%d')]
universe = Universe('zz500') + Universe('zz1000') universe = Universe('zz500') + Universe('zz1000')
factor = 'ROE' factor = 'ROE'
...@@ -414,7 +422,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -414,7 +422,8 @@ class TestSqlEngine(unittest.TestCase):
groups = codes.groupby('trade_date') groups = codes.groupby('trade_date')
for ref_date, g in groups: for ref_date, g in groups:
forward_ref_date = advanceDateByCalendar('china.sse', ref_date, '60b').strftime('%Y-%m-%d') forward_ref_date = advanceDateByCalendar('china.sse', ref_date, '60b').strftime(
'%Y-%m-%d')
query = select([Uqer.code, Uqer.ROE]).where( query = select([Uqer.code, Uqer.ROE]).where(
and_( and_(
Uqer.trade_date == forward_ref_date, Uqer.trade_date == forward_ref_date,
...@@ -451,7 +460,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -451,7 +460,8 @@ class TestSqlEngine(unittest.TestCase):
benchmark = 906 benchmark = 906
index_data = self.engine.fetch_benchmark_range(benchmark, dates=ref_dates) index_data = self.engine.fetch_benchmark_range(benchmark, dates=ref_dates)
query = select([IndexComponent.trade_date, IndexComponent.code, (IndexComponent.weight / 100.).label('weight')]).where( query = select([IndexComponent.trade_date, IndexComponent.code,
(IndexComponent.weight / 100.).label('weight')]).where(
and_( and_(
IndexComponent.trade_date.in_(ref_dates), IndexComponent.trade_date.in_(ref_dates),
IndexComponent.indexCode == benchmark IndexComponent.indexCode == benchmark
...@@ -462,7 +472,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -462,7 +472,8 @@ class TestSqlEngine(unittest.TestCase):
for ref_date in ref_dates: for ref_date in ref_dates:
calculated_data = index_data[index_data.trade_date == ref_date] calculated_data = index_data[index_data.trade_date == ref_date]
expected_data = df[df.trade_date == ref_date] expected_data = df[df.trade_date == ref_date]
np.testing.assert_array_almost_equal(calculated_data.weight.values, expected_data.weight.values) np.testing.assert_array_almost_equal(calculated_data.weight.values,
expected_data.weight.values)
def test_sql_engine_fetch_risk_model(self): def test_sql_engine_fetch_risk_model(self):
ref_date = self.ref_date ref_date = self.ref_date
...@@ -533,7 +544,7 @@ class TestSqlEngine(unittest.TestCase): ...@@ -533,7 +544,7 @@ class TestSqlEngine(unittest.TestCase):
ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw', 1) ind_matrix = self.engine.fetch_industry_matrix(ref_date, codes, 'sw', 1)
cols = sorted(ind_matrix.columns[2:].tolist()) cols = sorted(ind_matrix.columns[2:].tolist())
series = (ind_matrix[cols] * np.array(range(1, len(cols)+1))).sum(axis=1) series = (ind_matrix[cols] * np.array(range(1, len(cols) + 1))).sum(axis=1)
df3['cat'] = series.values df3['cat'] = series.values
expected_rank = df3[['ROE', 'cat']].groupby('cat').transform(lambda x: rankdata(x.values)) expected_rank = df3[['ROE', 'cat']].groupby('cat').transform(lambda x: rankdata(x.values))
...@@ -542,7 +553,8 @@ class TestSqlEngine(unittest.TestCase): ...@@ -542,7 +553,8 @@ class TestSqlEngine(unittest.TestCase):
np.testing.assert_array_almost_equal(df3['rank'].values, np.testing.assert_array_almost_equal(df3['rank'].values,
df1['f'].values) df1['f'].values)
expected_quantile = df3[['ROE', 'cat']].groupby('cat').transform(lambda x: rankdata(x.values) / (len(x) + 1)) expected_quantile = df3[['ROE', 'cat']].groupby('cat').transform(
lambda x: rankdata(x.values) / (len(x) + 1))
expected_quantile[np.isnan(df3.ROE)] = np.nan expected_quantile[np.isnan(df3.ROE)] = np.nan
df3['quantile'] = expected_quantile['ROE'].values df3['quantile'] = expected_quantile['ROE'].values
np.testing.assert_array_almost_equal(df3['quantile'].values, np.testing.assert_array_almost_equal(df3['quantile'].values,
......
...@@ -6,6 +6,7 @@ Created on 2018-2-9 ...@@ -6,6 +6,7 @@ Created on 2018-2-9
""" """
import unittest import unittest
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.data.engines.universe import load_universe from alphamind.data.engines.universe import load_universe
......
...@@ -6,8 +6,10 @@ Created on 2017-4-25 ...@@ -6,8 +6,10 @@ Created on 2017-4-25
""" """
import unittest import unittest
import numpy as np import numpy as np
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
...@@ -79,7 +81,8 @@ class TestNeutralize(unittest.TestCase): ...@@ -79,7 +81,8 @@ class TestNeutralize(unittest.TestCase):
exp_res = curr_y - curr_x @ model.coef_.T exp_res = curr_y - curr_x @ model.coef_.T
exp_explained = curr_x * model.coef_.T exp_explained = curr_x * model.coef_.T
np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1)) np.testing.assert_array_almost_equal(calc_res[self.groups == i], exp_res.reshape(-1, 1))
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0], exp_explained) np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, 0],
exp_explained)
calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True) calc_res, other_stats = neutralize(self.x, self.y, self.groups, detail=True)
...@@ -93,7 +96,8 @@ class TestNeutralize(unittest.TestCase): ...@@ -93,7 +96,8 @@ class TestNeutralize(unittest.TestCase):
for j in range(self.y.shape[1]): for j in range(self.y.shape[1]):
exp_explained = curr_x * model.coef_.T[:, j] exp_explained = curr_x * model.coef_.T[:, j]
np.testing.assert_array_almost_equal(other_stats['explained'][self.groups == i, :, j], exp_explained) np.testing.assert_array_almost_equal(
other_stats['explained'][self.groups == i, :, j], exp_explained)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -6,7 +6,9 @@ Created on 2017-8-16 ...@@ -6,7 +6,9 @@ Created on 2017-8-16
""" """
import unittest import unittest
import numpy as np import numpy as np
from alphamind.data.quantile import quantile from alphamind.data.quantile import quantile
......
...@@ -6,8 +6,10 @@ Created on 2017-8-8 ...@@ -6,8 +6,10 @@ Created on 2017-8-8
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.rank import rank from alphamind.data.rank import rank
...@@ -37,6 +39,7 @@ class TestRank(unittest.TestCase): ...@@ -37,6 +39,7 @@ class TestRank(unittest.TestCase):
ret.append(groups[index].values) ret.append(groups[index].values)
ret = np.concatenate(ret).reshape(-1, 1) ret = np.concatenate(ret).reshape(-1, 1)
expected_rank = data['raw'].groupby(level=0).apply(lambda x: x.values.argsort(axis=0).argsort(axis=0)) expected_rank = data['raw'].groupby(level=0).apply(
lambda x: x.values.argsort(axis=0).argsort(axis=0))
expected_rank = np.concatenate(expected_rank).reshape(-1, 1) expected_rank = np.concatenate(expected_rank).reshape(-1, 1)
np.testing.assert_array_equal(ret, expected_rank) np.testing.assert_array_equal(ret, expected_rank)
...@@ -6,12 +6,14 @@ Created on 2017-4-25 ...@@ -6,12 +6,14 @@ Created on 2017-4-25
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import zscore from scipy.stats import zscore
from alphamind.data.standardize import standardize
from alphamind.data.standardize import projection
from alphamind.data.standardize import Standardizer from alphamind.data.standardize import Standardizer
from alphamind.data.standardize import projection
from alphamind.data.standardize import standardize
class TestStandardize(unittest.TestCase): class TestStandardize(unittest.TestCase):
...@@ -42,8 +44,8 @@ class TestStandardize(unittest.TestCase): ...@@ -42,8 +44,8 @@ class TestStandardize(unittest.TestCase):
def test_standardize_with_group(self): def test_standardize_with_group(self):
calc_zscore = standardize(self.x, self.groups) calc_zscore = standardize(self.x, self.groups)
exp_zscore = pd.DataFrame(self.x).\ exp_zscore = pd.DataFrame(self.x). \
groupby(self.groups).\ groupby(self.groups). \
transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1)) transform(lambda s: (s - s.mean(axis=0)) / s.std(axis=0, ddof=1))
np.testing.assert_array_almost_equal(calc_zscore, exp_zscore) np.testing.assert_array_almost_equal(calc_zscore, exp_zscore)
......
...@@ -6,10 +6,12 @@ Created on 2017-4-25 ...@@ -6,10 +6,12 @@ Created on 2017-4-25
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.data.winsorize import winsorize_normal
from alphamind.data.winsorize import NormalWinsorizer from alphamind.data.winsorize import NormalWinsorizer
from alphamind.data.winsorize import winsorize_normal
class TestWinsorize(unittest.TestCase): class TestWinsorize(unittest.TestCase):
......
...@@ -6,7 +6,9 @@ Created on 2017-9-22 ...@@ -6,7 +6,9 @@ Created on 2017-9-22
""" """
import unittest import unittest
import pandas as pd import pandas as pd
from alphamind.execution.naiveexecutor import NaiveExecutor from alphamind.execution.naiveexecutor import NaiveExecutor
......
...@@ -7,11 +7,13 @@ Created on 2017-9-25 ...@@ -7,11 +7,13 @@ Created on 2017-9-25
import unittest import unittest
from collections import deque from collections import deque
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.execution.pipeline import ExecutionPipeline from alphamind.execution.pipeline import ExecutionPipeline
from alphamind.execution.thresholdexecutor import ThresholdExecutor
from alphamind.execution.targetvolexecutor import TargetVolExecutor from alphamind.execution.targetvolexecutor import TargetVolExecutor
from alphamind.execution.thresholdexecutor import ThresholdExecutor
class TestExecutionPipeline(unittest.TestCase): class TestExecutionPipeline(unittest.TestCase):
......
...@@ -7,8 +7,10 @@ Created on 2017-9-22 ...@@ -7,8 +7,10 @@ Created on 2017-9-22
import unittest import unittest
from collections import deque from collections import deque
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.execution.targetvolexecutor import TargetVolExecutor from alphamind.execution.targetvolexecutor import TargetVolExecutor
......
...@@ -6,7 +6,9 @@ Created on 2017-9-22 ...@@ -6,7 +6,9 @@ Created on 2017-9-22
""" """
import unittest import unittest
import pandas as pd import pandas as pd
from alphamind.execution.thresholdexecutor import ThresholdExecutor from alphamind.execution.thresholdexecutor import ThresholdExecutor
......
...@@ -6,9 +6,10 @@ Created on 2018-2-9 ...@@ -6,9 +6,10 @@ Created on 2018-2-9
""" """
import unittest import unittest
from alphamind.data.engines.universe import Universe from alphamind.data.engines.universe import Universe
from alphamind.model.composer import DataMeta
from alphamind.model.composer import Composer from alphamind.model.composer import Composer
from alphamind.model.composer import DataMeta
from alphamind.model.treemodel import XGBClassifier from alphamind.model.treemodel import XGBClassifier
...@@ -19,7 +20,6 @@ class TestComposer(unittest.TestCase): ...@@ -19,7 +20,6 @@ class TestComposer(unittest.TestCase):
self.assertEqual(lhs.data_meta, rhs.data_meta) self.assertEqual(lhs.data_meta, rhs.data_meta)
def test_data_meta_persistence(self): def test_data_meta_persistence(self):
freq = '5b' freq = '5b'
universe = Universe('zz800') universe = Universe('zz800')
batch = 4 batch = 4
...@@ -83,7 +83,3 @@ class TestComposer(unittest.TestCase): ...@@ -83,7 +83,3 @@ class TestComposer(unittest.TestCase):
comp_desc = composer.save() comp_desc = composer.save()
loaded_comp = Composer.load(comp_desc) loaded_comp = Composer.load(comp_desc)
self._assert_composer_equal(composer, loaded_comp) self._assert_composer_equal(composer, loaded_comp)
...@@ -6,14 +6,16 @@ Created on 2017-9-4 ...@@ -6,14 +6,16 @@ Created on 2017-9-4
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.linear_model import LinearRegression as LinearRegression2 from sklearn.linear_model import LinearRegression as LinearRegression2
from alphamind.model.loader import load_model from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from sklearn.linear_model import LogisticRegression as LogisticRegression2
from alphamind.model.linearmodel import LogisticRegression from alphamind.model.linearmodel import LogisticRegression
from alphamind.model.loader import load_model
class TestLinearModel(unittest.TestCase): class TestLinearModel(unittest.TestCase):
...@@ -27,7 +29,6 @@ class TestLinearModel(unittest.TestCase): ...@@ -27,7 +29,6 @@ class TestLinearModel(unittest.TestCase):
self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c']) self.predict_x = pd.DataFrame(np.random.randn(10, self.n), columns=['a', 'b', 'c'])
def test_const_linear_model(self): def test_const_linear_model(self):
features = ['c', 'b', 'a'] features = ['c', 'b', 'a']
weights = dict(c=3., b=2., a=1.) weights = dict(c=3., b=2., a=1.)
model = ConstLinearModel(features=features, model = ConstLinearModel(features=features,
...@@ -111,4 +112,3 @@ class TestLinearModel(unittest.TestCase): ...@@ -111,4 +112,3 @@ class TestLinearModel(unittest.TestCase):
np.testing.assert_array_almost_equal(calculated_y, expected_y) np.testing.assert_array_almost_equal(calculated_y, expected_y)
np.testing.assert_array_almost_equal(new_model.weights, model.weights) np.testing.assert_array_almost_equal(new_model.weights, model.weights)
...@@ -6,8 +6,10 @@ Created on 2017-9-5 ...@@ -6,8 +6,10 @@ Created on 2017-9-5
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.model.linearmodel import LinearRegression from alphamind.model.linearmodel import LinearRegression
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
......
...@@ -6,6 +6,7 @@ Created on 2018-2-8 ...@@ -6,6 +6,7 @@ Created on 2018-2-8
""" """
import unittest import unittest
from alphamind.model.linearmodel import ConstLinearModel from alphamind.model.linearmodel import ConstLinearModel
......
...@@ -6,13 +6,15 @@ Created on 2018-1-5 ...@@ -6,13 +6,15 @@ Created on 2018-1-5
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.model.loader import load_model from alphamind.model.loader import load_model
from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import RandomForestClassifier from alphamind.model.treemodel import RandomForestClassifier
from alphamind.model.treemodel import XGBRegressor from alphamind.model.treemodel import RandomForestRegressor
from alphamind.model.treemodel import XGBClassifier from alphamind.model.treemodel import XGBClassifier
from alphamind.model.treemodel import XGBRegressor
from alphamind.model.treemodel import XGBTrainer from alphamind.model.treemodel import XGBTrainer
...@@ -32,7 +34,8 @@ class TestTreeModel(unittest.TestCase): ...@@ -32,7 +34,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x)) np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_random_forest_classify_persistence(self): def test_random_forest_classify_persistence(self):
...@@ -44,7 +47,8 @@ class TestTreeModel(unittest.TestCase): ...@@ -44,7 +47,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x)) np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_regress_persistence(self): def test_xgb_regress_persistence(self):
...@@ -55,7 +59,8 @@ class TestTreeModel(unittest.TestCase): ...@@ -55,7 +59,8 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x)) np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_classify_persistence(self): def test_xgb_classify_persistence(self):
...@@ -67,11 +72,11 @@ class TestTreeModel(unittest.TestCase): ...@@ -67,11 +72,11 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x)) np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
def test_xgb_trainer_equal_classifier(self): def test_xgb_trainer_equal_classifier(self):
model1 = XGBClassifier(n_estimators=100, model1 = XGBClassifier(n_estimators=100,
learning_rate=0.1, learning_rate=0.1,
max_depth=3, max_depth=3,
...@@ -109,5 +114,6 @@ class TestTreeModel(unittest.TestCase): ...@@ -109,5 +114,6 @@ class TestTreeModel(unittest.TestCase):
new_model = load_model(desc) new_model = load_model(desc)
self.assertEqual(model.features, new_model.features) self.assertEqual(model.features, new_model.features)
np.testing.assert_array_almost_equal(model.predict(self.sample_x), new_model.predict(self.sample_x)) np.testing.assert_array_almost_equal(model.predict(self.sample_x),
new_model.predict(self.sample_x))
np.testing.assert_array_almost_equal(model.importances, new_model.importances) np.testing.assert_array_almost_equal(model.importances, new_model.importances)
...@@ -6,15 +6,17 @@ Created on 2017-7-20 ...@@ -6,15 +6,17 @@ Created on 2017-7-20
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryDirection from alphamind.portfolio.constraints import BoundaryDirection
from alphamind.portfolio.constraints import BoundaryImpl
from alphamind.portfolio.constraints import BoundaryType from alphamind.portfolio.constraints import BoundaryType
from alphamind.portfolio.constraints import create_box_bounds from alphamind.portfolio.constraints import BoxBoundary
from alphamind.portfolio.constraints import Constraints
from alphamind.portfolio.constraints import LinearConstraints from alphamind.portfolio.constraints import LinearConstraints
from alphamind.portfolio.constraints import create_box_bounds
class TestConstraints(unittest.TestCase): class TestConstraints(unittest.TestCase):
......
...@@ -6,7 +6,9 @@ Created on 2017-11-23 ...@@ -6,7 +6,9 @@ Created on 2017-11-23
""" """
import unittest import unittest
import numpy as np import numpy as np
from alphamind.portfolio.evolver import evolve_positions from alphamind.portfolio.evolver import evolve_positions
......
...@@ -6,7 +6,9 @@ Created on 2017-5-5 ...@@ -6,7 +6,9 @@ Created on 2017-5-5
""" """
import unittest import unittest
import numpy as np import numpy as np
from alphamind.portfolio.linearbuilder import linear_builder from alphamind.portfolio.linearbuilder import linear_builder
......
...@@ -6,8 +6,10 @@ Created on 2017-5-9 ...@@ -6,8 +6,10 @@ Created on 2017-5-9
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.longshortbulder import long_short_builder from alphamind.portfolio.longshortbulder import long_short_builder
...@@ -37,7 +39,8 @@ class TestLongShortBuild(unittest.TestCase): ...@@ -37,7 +39,8 @@ class TestLongShortBuild(unittest.TestCase):
np.testing.assert_array_almost_equal(calc_weights, expected_weights) np.testing.assert_array_almost_equal(calc_weights, expected_weights)
calc_weights = long_short_builder(self.x, groups=self.groups) calc_weights = long_short_builder(self.x, groups=self.groups)
expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(lambda s: s / np.abs(s).sum(axis=0)) expected_weights = pd.DataFrame(self.x).groupby(self.groups).apply(
lambda s: s / np.abs(s).sum(axis=0))
np.testing.assert_array_almost_equal(calc_weights, expected_weights) np.testing.assert_array_almost_equal(calc_weights, expected_weights)
def test_long_short_build_with_masks(self): def test_long_short_build_with_masks(self):
......
...@@ -6,8 +6,9 @@ Created on 2017-6-27 ...@@ -6,8 +6,9 @@ Created on 2017-6-27
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder from alphamind.portfolio.meanvariancebuilder import target_vol_builder
...@@ -31,7 +32,8 @@ class TestMeanVarianceBuild(unittest.TestCase): ...@@ -31,7 +32,8 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7])) risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target) status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
risk_target)
self.assertTrue(status == 'optimal') self.assertTrue(status == 'optimal')
self.assertAlmostEqual(x.sum(), bm.sum()) self.assertAlmostEqual(x.sum(), bm.sum())
...@@ -77,7 +79,8 @@ class TestMeanVarianceBuild(unittest.TestCase): ...@@ -77,7 +79,8 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7])) risk_target = (np.array([bm.sum(), 0.3]), np.array([bm.sum(), 0.7]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target, lam=100) status, _, x = mean_variance_builder(er, model, bm, lbound, ubound, risk_exposure,
risk_target, lam=100)
self.assertTrue(status == 'optimal') self.assertTrue(status == 'optimal')
self.assertAlmostEqual(x.sum(), bm.sum()) self.assertAlmostEqual(x.sum(), bm.sum())
...@@ -101,7 +104,8 @@ class TestMeanVarianceBuild(unittest.TestCase): ...@@ -101,7 +104,8 @@ class TestMeanVarianceBuild(unittest.TestCase):
risk_exposure = np.array([[1., 1., 1.]]).T risk_exposure = np.array([[1., 1., 1.]]).T
risk_target = (np.array([bm.sum()]), np.array([bm.sum()])) risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None) model = dict(cov=cov, factor_cov=None, factor_loading=None, idsync=None)
status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target, 0.1) status, _, x = target_vol_builder(er, model, bm, lbound, ubound, risk_exposure, risk_target,
0.1)
self.assertTrue(status == 'optimal') self.assertTrue(status == 'optimal')
self.assertTrue(np.all(x <= ubound + 1.e-6)) self.assertTrue(np.all(x <= ubound + 1.e-6))
self.assertTrue(np.all(x >= lbound) - 1.e-6) self.assertTrue(np.all(x >= lbound) - 1.e-6)
......
...@@ -6,8 +6,10 @@ Created on 2017-5-4 ...@@ -6,8 +6,10 @@ Created on 2017-5-4
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.percentbuilder import percent_build from alphamind.portfolio.percentbuilder import percent_build
......
...@@ -6,8 +6,10 @@ Created on 2017-4-27 ...@@ -6,8 +6,10 @@ Created on 2017-4-27
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
......
...@@ -6,17 +6,21 @@ Created on 2018-5-29 ...@@ -6,17 +6,21 @@ Created on 2018-5-29
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.portfolio.riskmodel import FullRiskModel
from alphamind.portfolio.riskmodel import FactorRiskModel from alphamind.portfolio.riskmodel import FactorRiskModel
from alphamind.portfolio.riskmodel import FullRiskModel
class TestRiskModel(unittest.TestCase): class TestRiskModel(unittest.TestCase):
def setUp(self): def setUp(self):
self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'], index=['a', 'b']) self.factor_cov = pd.DataFrame([[0.5, -0.3], [-0.3, 0.7]], columns=['a', 'b'],
self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'], index=[1, 2, 3]) index=['a', 'b'])
self.risk_exp = pd.DataFrame([[0.8, 0.2], [0.5, 0.5], [0.2, 0.8]], columns=['a', 'b'],
index=[1, 2, 3])
self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3]) self.idsync = pd.Series([0.1, 0.3, 0.2], index=[1, 2, 3])
self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \ self.sec_cov = self.risk_exp.values @ self.factor_cov.values @ self.risk_exp.values.T \
+ np.diag(self.idsync.values) + np.diag(self.idsync.values)
...@@ -55,5 +59,3 @@ class TestRiskModel(unittest.TestCase): ...@@ -55,5 +59,3 @@ class TestRiskModel(unittest.TestCase):
np.testing.assert_array_almost_equal(res, self.risk_exp) np.testing.assert_array_almost_equal(res, self.risk_exp)
res = model.get_idsync() res = model.get_idsync()
np.testing.assert_array_almost_equal(res, self.idsync) np.testing.assert_array_almost_equal(res, self.idsync)
...@@ -5,10 +5,11 @@ Created on 2017-4-28 ...@@ -5,10 +5,11 @@ Created on 2017-4-28
@author: cheng.li @author: cheng.li
""" """
import unittest import unittest
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from alphamind.settlement.simplesettle import simple_settle from alphamind.settlement.simplesettle import simple_settle
......
...@@ -14,7 +14,6 @@ if not SKIP_ENGINE_TESTS: ...@@ -14,7 +14,6 @@ if not SKIP_ENGINE_TESTS:
else: else:
DATA_ENGINE_URI = None DATA_ENGINE_URI = None
if __name__ == '__main__': if __name__ == '__main__':
from simpleutils import add_parent_path from simpleutils import add_parent_path
......
...@@ -6,18 +6,17 @@ Created on 2017-4-25 ...@@ -6,18 +6,17 @@ Created on 2017-4-25
""" """
import base64 import base64
import pickle
import math import math
from simpleutils import CustomLogger import pickle
import numpy as np
import numba as nb
import numba as nb
import numpy as np
from simpleutils import CustomLogger
alpha_logger = CustomLogger('ALPHA_MIND', 'info') alpha_logger = CustomLogger('ALPHA_MIND', 'info')
def map_freq(freq): def map_freq(freq):
if freq == '1m': if freq == '1m':
horizon = 21 horizon = 21
elif freq == '1w': elif freq == '1w':
...@@ -97,7 +96,6 @@ def simple_sum(x, axis=0): ...@@ -97,7 +96,6 @@ def simple_sum(x, axis=0):
@nb.njit(nogil=True, cache=True) @nb.njit(nogil=True, cache=True)
def simple_abssum(x, axis=0): def simple_abssum(x, axis=0):
length, width = x.shape length, width = x.shape
if axis == 0: if axis == 0:
...@@ -189,7 +187,7 @@ def simple_std(x, axis=0, ddof=1): ...@@ -189,7 +187,7 @@ def simple_std(x, axis=0, ddof=1):
def agg_sum(groups, x): def agg_sum(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64) res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
...@@ -215,7 +213,7 @@ def agg_sqrsum(groups, x): ...@@ -215,7 +213,7 @@ def agg_sqrsum(groups, x):
def agg_abssum(groups, x): def agg_abssum(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64) res = np.zeros((max_g + 1, width), dtype=np.float64)
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
...@@ -227,15 +225,15 @@ def agg_abssum(groups, x): ...@@ -227,15 +225,15 @@ def agg_abssum(groups, x):
def agg_mean(groups, x): def agg_mean(groups, x):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64) res = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32) bin_count = np.zeros(max_g + 1, dtype=np.int32)
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
res[groups[i], j] += x[i, j] res[groups[i], j] += x[i, j]
bin_count[groups[i]] += 1 bin_count[groups[i]] += 1
for i in range(max_g+1): for i in range(max_g + 1):
curr = bin_count[i] curr = bin_count[i]
for j in range(width): for j in range(width):
res[i, j] /= curr res[i, j] /= curr
...@@ -246,9 +244,9 @@ def agg_mean(groups, x): ...@@ -246,9 +244,9 @@ def agg_mean(groups, x):
def agg_std(groups, x, ddof=1): def agg_std(groups, x, ddof=1):
max_g = groups.max() max_g = groups.max()
length, width = x.shape length, width = x.shape
res = np.zeros((max_g+1, width), dtype=np.float64) res = np.zeros((max_g + 1, width), dtype=np.float64)
sumsq = np.zeros((max_g + 1, width), dtype=np.float64) sumsq = np.zeros((max_g + 1, width), dtype=np.float64)
bin_count = np.zeros(max_g+1, dtype=np.int32) bin_count = np.zeros(max_g + 1, dtype=np.int32)
for i in range(length): for i in range(length):
for j in range(width): for j in range(width):
...@@ -256,7 +254,7 @@ def agg_std(groups, x, ddof=1): ...@@ -256,7 +254,7 @@ def agg_std(groups, x, ddof=1):
sumsq[groups[i], j] += x[i, j] * x[i, j] sumsq[groups[i], j] += x[i, j] * x[i, j]
bin_count[groups[i]] += 1 bin_count[groups[i]] += 1
for i in range(max_g+1): for i in range(max_g + 1):
curr = bin_count[i] curr = bin_count[i]
for j in range(width): for j in range(width):
res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof)) res[i, j] = math.sqrt((sumsq[i, j] - res[i, j] * res[i, j] / curr) / (curr - ddof))
...@@ -304,9 +302,8 @@ def array_index(array, items): ...@@ -304,9 +302,8 @@ def array_index(array, items):
def transform(groups: np.ndarray, def transform(groups: np.ndarray,
x: np.ndarray, x: np.ndarray,
func: str, func: str,
ddof: int=1, ddof: int = 1,
scale: float=1.) -> np.ndarray: scale: float = 1.) -> np.ndarray:
if func == 'mean': if func == 'mean':
value_data = agg_mean(groups, x) value_data = agg_mean(groups, x)
elif func == 'std': elif func == 'std':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment