Commit 19bb12be authored by Dr.李's avatar Dr.李

update standardize to avoid overflow

parent a4d062a0
...@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray: ...@@ -22,9 +22,9 @@ def standardize(x: np.ndarray, groups: np.ndarray=None, ddof=1) -> np.ndarray:
mean_values = transform(groups, x, 'mean') mean_values = transform(groups, x, 'mean')
std_values = transform(groups, x, 'std', ddof) std_values = transform(groups, x, 'std', ddof)
return (x - mean_values) / std_values return (x - mean_values) / np.maximum(std_values, 1e-8)
else: else:
return (x - simple_mean(x, axis=0)) / simple_std(x, axis=0, ddof=ddof) return (x - simple_mean(x, axis=0)) / np.maximum(simple_std(x, axis=0, ddof=ddof), 1e-8)
def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray: def projection(x: np.ndarray, groups: np.ndarray=None, axis=1) -> np.ndarray:
...@@ -48,7 +48,7 @@ class Standardizer(object): ...@@ -48,7 +48,7 @@ class Standardizer(object):
self.std_ = simple_std(x, axis=0, ddof=self.ddof_) self.std_ = simple_std(x, axis=0, ddof=self.ddof_)
def transform(self, x: np.ndarray) -> np.ndarray: def transform(self, x: np.ndarray) -> np.ndarray:
return (x - self.mean_) / self.std_ return (x - self.mean_) / np.maximum(self.std_, 1e-8)
class GroupedStandardizer(object): class GroupedStandardizer(object):
...@@ -69,4 +69,4 @@ class GroupedStandardizer(object): ...@@ -69,4 +69,4 @@ class GroupedStandardizer(object):
def transform(self, x: np.ndarray) -> np.ndarray: def transform(self, x: np.ndarray) -> np.ndarray:
groups = x[:, 0].astype(int) groups = x[:, 0].astype(int)
index = array_index(self.labels_, groups) index = array_index(self.labels_, groups)
return (x[:, 1:] - self.mean_[index]) / self.std_[index] return (x[:, 1:] - self.mean_[index]) / np.maximum(self.std_[index], 1e-8)
# -*- coding: utf-8 -*-
"""
Created on 2018-1-15
@author: cheng.li
"""
import numpy as np
import pandas as pd
from PyFin.api import *
from alphamind.api import *
def factor_residue_analysis(start_date,
end_date,
factor,
freq,
universe,
engine):
neutralize_risk = ['SIZE', 'LEVERAGE'] + industry_styles
n_bins = 5
horizon = map_freq(freq)
dates = makeSchedule(start_date,
end_date,
tenor=freq,
calendar='china.sse')
alpha_factor_name = factor + '_res'
base1 = LAST('roe_q')
base2 = CSRes(LAST('ep_q'), 'roe_q')
alpha_factor = {alpha_factor_name: CSRes(CSRes(LAST(factor), base1), base2)}
factor_all_data = engine.fetch_data_range(universe,
alpha_factor,
dates=dates)['factor']
return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
factor_groups = factor_all_data.groupby('trade_date')
return_groups = return_all_data.groupby('trade_date')
final_res = np.zeros((len(factor_groups.groups), n_bins))
index_dates = []
for i, value in enumerate(factor_groups):
date = value[0]
data = value[1][['code', alpha_factor_name, 'isOpen'] + neutralize_risk]
returns = return_groups.get_group(date)
total_data = pd.merge(data, returns, on=['code']).dropna()
risk_exp = total_data[neutralize_risk].values.astype(float)
dx_return = total_data.dx.values
index_dates.append(date)
try:
er = factor_processing(total_data[[alpha_factor_name]].values,
pre_process=[winsorize_normal, standardize],
risk_factors=risk_exp,
post_process=[winsorize_normal, standardize])
res = er_quantile_analysis(er,
n_bins=n_bins,
dx_return=dx_return)
except Exception as e:
print(e)
res = np.zeros(n_bins)
final_res[i] = res
df = pd.DataFrame(final_res, index=index_dates)
start_date = advanceDateByCalendar('china.sse', dates[0], '-1d')
df.loc[start_date] = 0.
df.sort_index(inplace=True)
df['$top1 - top5$'] = df[0] - df[4]
return df
engine = SqlEngine()
df = engine.fetch_factor_coverage().groupby('factor').mean()
df = df[df.coverage >= 0.98]
universe = Universe('custom', ['zz800'])
factor_df = pd.DataFrame()
for i, factor in enumerate(df.index):
res = factor_residue_analysis('2012-01-01',
'2018-01-05',
factor,
'5b',
universe,
engine)
factor_df[factor] = res['$top1 - top5$']
alpha_logger.info('{0}: {1} is done'.format(i + 1, factor))
Subproject commit a187ed6c8f3aa40b47d5be80667cbbe6a6fd563d Subproject commit bf4367184164e593cd2856ef38f8dd4f8cc76999
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment