Commit 39c46140 authored by Dr.李's avatar Dr.李

update sql engine

parent d9bca04c
...@@ -378,7 +378,6 @@ class SqlEngine(object): ...@@ -378,7 +378,6 @@ class SqlEngine(object):
if col not in set(['code', 'isOpen']) and col not in df.columns: if col not in set(['code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values df[col] = res[col].values
df.dropna(inplace=True)
df['isOpen'] = df.isOpen.astype(bool) df['isOpen'] = df.isOpen.astype(bool)
df = df.loc[ref_date] df = df.loc[ref_date]
df.index = list(range(len(df))) df.index = list(range(len(df)))
...@@ -447,7 +446,6 @@ class SqlEngine(object): ...@@ -447,7 +446,6 @@ class SqlEngine(object):
if col not in set(['code', 'isOpen']) and col not in df.columns: if col not in set(['code', 'isOpen']) and col not in df.columns:
df[col] = res[col].values df[col] = res[col].values
df.dropna(inplace=True)
df['isOpen'] = df.isOpen.astype(bool) df['isOpen'] = df.isOpen.astype(bool)
df = df.reset_index() df = df.reset_index()
return pd.merge(df, universe_df[['trade_date', 'code']], how='inner') return pd.merge(df, universe_df[['trade_date', 'code']], how='inner')
......
...@@ -238,19 +238,38 @@ class Composer(object): ...@@ -238,19 +238,38 @@ class Composer(object):
if __name__ == '__main__': if __name__ == '__main__':
from PyFin.api import LAST from alphamind.api import (risk_styles,
from alphamind.data.engines.sqlengine import risk_styles, industry_styles industry_styles,
from alphamind.model.linearmodel import LinearRegression standardize,
winsorize_normal,
DataMeta,
LinearRegression,
fetch_data_package,
map_freq)
from PyFin.api import LAST, SHIFT
freq = '60b'
universe = Universe('custom', ['ashare_ex']) universe = Universe('custom', ['ashare_ex'])
freq = '20b' batch = 1
batch = 0 neutralized_risk = industry_styles
neutralized_risk = risk_styles + industry_styles
risk_model = 'short' risk_model = 'short'
pre_process = [winsorize_normal, standardize] pre_process = [winsorize_normal, standardize]
post_process = [standardize] post_process = [standardize]
warm_start = 0 warm_start = 3
data_source = "postgres+psycopg2://postgres:we083826@localhost/alpha" data_source = None
horizon = map_freq(freq)
engine = SqlEngine(data_source)
fit_intercept = True
kernal_feature = 'roe_q'
regress_features = {kernal_feature: LAST(kernal_feature),
kernal_feature + '_l1': SHIFT(kernal_feature, 1),
kernal_feature + '_l2': SHIFT(kernal_feature, 2),
kernal_feature + '_l3': SHIFT(kernal_feature, 3)
}
const_features = {kernal_feature: LAST(kernal_feature)}
fit_target = [kernal_feature]
data_meta = DataMeta(freq=freq, data_meta = DataMeta(freq=freq,
universe=universe, universe=universe,
...@@ -262,9 +281,28 @@ if __name__ == '__main__': ...@@ -262,9 +281,28 @@ if __name__ == '__main__':
warm_start=warm_start, warm_start=warm_start,
data_source=data_source) data_source=data_source)
alpha_model = LinearRegression({'roe_q': LAST('roe_q')}, fit_target='roe_q') alpha_model = LinearRegression(features=regress_features, fit_intercept=True, fit_target=fit_target)
composer = Composer(alpha_model=alpha_model, data_meta=data_meta) composer = Composer(alpha_model=alpha_model, data_meta=data_meta)
ref_date = '2018-01-30' start_date = '2014-01-01'
composer.train(ref_date) end_date = '2016-01-01'
res = composer.predict(ref_date)
\ No newline at end of file regression_model = LinearRegression(features=regress_features, fit_intercept=fit_intercept, fit_target=fit_target)
regression_composer = Composer(alpha_model=regression_model, data_meta=data_meta)
#regression_composer.train('2010-07-07')
data_package1 = fetch_data_package(engine,
alpha_factors=[kernal_feature],
start_date=start_date,
end_date=end_date,
frequency=freq,
universe=universe,
benchmark=906,
warm_start=warm_start,
batch=1,
neutralized_risk=neutralized_risk,
pre_process=pre_process,
post_process=post_process,
fit_target=fit_target)
pass
\ No newline at end of file
...@@ -30,7 +30,7 @@ def _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model, ...@@ -30,7 +30,7 @@ def _merge_df(engine, names, factor_df, target_df, universe, dates, risk_model,
risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1] risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1]
used_neutralized_risk = list(set(total_risk_factors).difference(names)) used_neutralized_risk = list(set(total_risk_factors).difference(names))
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna() risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
target_df = pd.merge(target_df, risk_df, on=['trade_date', 'code']) target_df = pd.merge(target_df, risk_df, on=['trade_date', 'code']).dropna()
if neutralized_risk: if neutralized_risk:
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code']) train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
...@@ -220,7 +220,7 @@ def fetch_data_package(engine: SqlEngine, ...@@ -220,7 +220,7 @@ def fetch_data_package(engine: SqlEngine,
frequency, frequency,
universe, universe,
benchmark, benchmark,
warm_start, warm_start + batch,
fit_target=fit_target) fit_target=fit_target)
target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \ target_df, dates, date_label, risk_exp, x_values, y_values, train_x, train_y, codes = \
...@@ -255,7 +255,17 @@ def fetch_data_package(engine: SqlEngine, ...@@ -255,7 +255,17 @@ def fetch_data_package(engine: SqlEngine,
ret = dict() ret = dict()
ret['x_names'] = names ret['x_names'] = names
ret['settlement'] = target_df ret['settlement'] = target_df[target_df.trade_date >= start_date]
train_x_buckets = {k: train_x_buckets[k] for k in train_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_y_buckets = {k: train_y_buckets[k] for k in train_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
train_risk_buckets = {k: train_risk_buckets[k] for k in train_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_x_buckets = {k: predict_x_buckets[k] for k in predict_x_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_y_buckets = {k: predict_y_buckets[k] for k in predict_y_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_risk_buckets = {k: predict_risk_buckets[k] for k in predict_risk_buckets if k.strftime('%Y-%m-%d') >= start_date}
predict_codes_bucket = {k: predict_codes_bucket[k] for k in predict_codes_bucket if k.strftime('%Y-%m-%d') >= start_date}
ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets} ret['train'] = {'x': train_x_buckets, 'y': train_y_buckets, 'risk': train_risk_buckets}
ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets, ret['predict'] = {'x': predict_x_buckets, 'y': predict_y_buckets, 'risk': predict_risk_buckets,
'code': predict_codes_bucket} 'code': predict_codes_bucket}
...@@ -403,7 +413,7 @@ def fetch_predict_phase(engine, ...@@ -403,7 +413,7 @@ def fetch_predict_phase(engine,
train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left') train_x = pd.merge(factor_df, target_df, on=['trade_date', 'code'], how='left')
risk_exp = None risk_exp = None
train_x.dropna(inplace=True) train_x.dropna(inplace=True, subset=train_x.columns[:-1])
x_values = train_x[names].values.astype(float) x_values = train_x[names].values.astype(float)
y_values = train_x[['dx']].values.astype(float) y_values = train_x[['dx']].values.astype(float)
...@@ -456,18 +466,3 @@ def fetch_predict_phase(engine, ...@@ -456,18 +466,3 @@ def fetch_predict_phase(engine,
return ret return ret
if __name__ == '__main__':
from alphamind.api import risk_styles, industry_styles, standardize
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('zz500', ['hs300', 'zz500'])
neutralized_risk = risk_styles + industry_styles
res = fetch_train_phase(engine, ['ep_q'],
'2012-01-05',
'5b',
universe,
2,
neutralized_risk=neutralized_risk,
post_process=[standardize],
fit_target='closePrice')
print(res)
...@@ -294,6 +294,3 @@ class TestSqlEngine(unittest.TestCase): ...@@ -294,6 +294,3 @@ class TestSqlEngine(unittest.TestCase):
) )
cov_df = pd.read_sql(query, con=self.engine.engine) cov_df = pd.read_sql(query, con=self.engine.engine)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment