update strategy

8c8d4550 · Dr.李 · 4c100448 · 8c8d4550 · 8c8d4550
Commit 8c8d4550 authored May 08, 2018 by Dr.李
Hide whitespace changes
Inline Side-by-side

Showing with 104 additions and 45 deletions

model_zoo.py alphamind/examples/model_zoo.py +84 -25

strategy.py alphamind/strategy/strategy.py +20 -20

No files found.
--- a/alphamind/examples/model_zoo.py
+++ b/alphamind/examples/model_zoo.py
@@ -5,41 +5,100 @@ Created on 2017-9-5
 @author: cheng.li
 """

-import sqlalchemy as sa
-import arrow
-import numpy as np
+import math
 import pandas as pd
+import numpy as np
+from PyFin.api import *
 from alphamind.api import *
-from alphamind.data.dbmodel.models import Models
-from alphamind.model.linearmodel import LinearRegression

-engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
+factor = 'ROE'
+universe = Universe('custom', ['zz800'])
+start_date = '2010-01-01'
+end_date = '2018-04-26'
+freq = '10b'
+category = 'sw_adj'
+level = 1
+horizon = map_freq(freq)
+ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
+
+
+def factor_analysis(factor):
+    engine = SqlEngine()
+
+    factors = {
+        'f1': CSQuantiles(factor),
+        'f2': CSQuantiles(factor, groups='sw1_adj'),
+        'f3': LAST(factor)
+    }
+
+    total_factor = engine.fetch_factor_range(universe, factors, dates=ref_dates)
+    _, risk_exp = engine.fetch_risk_model_range(universe, dates=ref_dates)
+    industry = engine.fetch_industry_range(universe, dates=ref_dates, category=category, level=level)
+    rets = engine.fetch_dx_return_range(universe, horizon=horizon, offset=1, dates=ref_dates)
+
+    total_factor = pd.merge(total_factor, industry[['trade_date', 'code', 'industry']], on=['trade_date', 'code'])
+    total_factor = pd.merge(total_factor, risk_exp, on=['trade_date', 'code'])
+    total_factor = pd.merge(total_factor, rets, on=['trade_date', 'code']).dropna()
+
+    df_ret = pd.DataFrame(columns=['f1', 'f2', 'f3'])
+    df_ic = pd.DataFrame(columns=['f1', 'f2', 'f3'])
+
+    total_factor_groups = total_factor.groupby('trade_date')
+
+    for date, this_factors in total_factor_groups:
+        raw_factors = this_factors['f3'].values
+        industry_exp = this_factors[industry_styles + ['COUNTRY']].values.astype(float)
+        processed_values = factor_processing(raw_factors, pre_process=[], risk_factors=industry_exp,
+                                             post_process=[percentile])
+        this_factors['f3'] = processed_values
+
+        factor_values = this_factors[['f1', 'f2', 'f3']].values
+        positions = (factor_values >= 0.8) * 1.
+        positions[factor_values <= 0.2] = -1
+        positions /= np.abs(positions).sum(axis=0)
+
+        ret_values = this_factors.dx.values @ positions
+        df_ret.loc[date] = ret_values
+        ic_values = this_factors[['dx', 'f1', 'f2', 'f3']].corr().values[0, 1:]
+        df_ic.loc[date] = ic_values

-x = np.random.randn(1000, 3)
-y = np.random.randn(1000)
+    print(f"{factor} is finished")

-model = LinearRegression(['a', 'b', 'c'])
-model.fit(x, y)
+    return {'ic': (df_ic.mean(axis=0), df_ic.std(axis=0) / math.sqrt(len(df_ic))),
+            'ret': (df_ret.mean(axis=0), df_ret.std(axis=0) / math.sqrt(len(df_ic))),
+            'factor': factor}

-model_desc = model.save()

-df = pd.DataFrame()
+if __name__ == '__main__':

-new_row = dict(trade_date='2017-09-05',
-               portfolio_name='test',
-               model_type='LinearRegression',
-               version=1,
-               model_desc=model_desc,
-               update_time=arrow.now().format())
+    from dask.distributed import Client

-df = df.append([new_row])
+    try:
+        client = Client("10.63.6.176:8786")
+        cols = pd.MultiIndex.from_product([['mean', 'std'], ['raw', 'peer', 'neutralized']])
+        factors_ret = pd.DataFrame(columns=cols)
+        factors_ic = pd.DataFrame(columns=cols)

-df.to_sql(Models.__table__.name, engine.engine,
-          if_exists='append',
-          index=False,
-          dtype={'model_desc': sa.types.JSON})
+        factors = ['ep_q',
+                   'roe_q',
+                   'SGRO',
+                   'GREV',
+                   'IVR',
+                   'ILLIQUIDITY',
+                   'con_target_price',
+                   'con_pe_rolling_order',
+                   'DividendPaidRatio']
+        l = client.map(factor_analysis, factors)
+        results = client.gather(l)

-model_in_db = engine.fetch_model('2017-09-05')
+        for res in results:
+            factor = res['factor']
+            factors_ret.loc[factor, 'mean'] = res['ret'][0].values
+            factors_ret.loc[factor, 'std'] = res['ret'][1].values

-print(model_in_db)
+            factors_ic.loc[factor, 'mean'] = res['ic'][0].values
+            factors_ic.loc[factor, 'std'] = res['ic'][1].values

+        print(factors_ret)
+    finally:
+        client.close()
--- a/alphamind/strategy/strategy.py
+++ b/alphamind/strategy/strategy.py
@@ -104,6 +104,7 @@ class Strategy(object):
        total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left')
        total_data.fillna({'weight': 0.}, inplace=True)
        total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code'])
+        total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code'])

        is_in_benchmark = (total_data.weight > 0.).astype(float).reshape((-1, 1))
        total_data.loc[:, 'benchmark'] = is_in_benchmark
@@ -134,7 +135,7 @@ class Strategy(object):
        for ref_date, this_data in total_data_groups:
            new_model = models[ref_date]

-            this_data.fillna(total_data.median(), inplace=True)
+            this_data = this_data.fillna(this_data[new_model.features].median())
            codes = this_data.code.values.tolist()

            if self.running_setting.rebalance_method == 'tv':
@@ -251,35 +252,34 @@ if __name__ == '__main__':

    start_date = '2011-01-01'
    end_date = '2018-05-04'
-    freq = '5b'
+    freq = '20b'
    neutralized_risk = None
-    universe = Universe("custom", ['zz800', 'cyb', 'zz1000'])
+    universe = Universe("custom", ['zz800'])
    dask_client = Client('10.63.6.176:8786')

+    factor = CSQuantiles(LAST('NetProfitRatio'),
+                         groups='sw1_adj')
    alpha_factors = {
-        'f01': CSQuantiles(LAST('ep_q'), groups='sw1_adj'),
-        'f02': CSQuantiles(LAST('roe_q'), groups='sw1_adj'),
-        'f03': CSQuantiles(LAST('SGRO'), groups='sw1_adj'),
-        'f04': CSQuantiles(LAST('GREV'), groups='sw1_adj'),
-        'f05': CSQuantiles(LAST('con_peg_rolling'), groups='sw1_adj'),
-        'f06': CSQuantiles(LAST('con_pe_rolling_order'), groups='sw1_adj'),
-        'f07': CSQuantiles(LAST('IVR'), groups='sw1_adj'),
-        'f08': CSQuantiles(LAST('ILLIQUIDITY'), groups='sw1_adj'),
-        'f09': CSQuantiles(LAST('DividendPaidRatio'), groups='sw1_adj'),
+        str(factor): factor,
    }

-    alpha_model = XGBTrainer(objective='reg:linear',
-                             booster='gbtree',
-                             n_estimators=300,
-                             eval_sample=0.25,
-                             features=alpha_factors)
+    weights = {str(factor): 1.}
+
+    # alpha_model = XGBTrainer(objective='reg:linear',
+    #                          booster='gbtree',
+    #                          n_estimators=300,
+    #                          eval_sample=0.25,
+    #                          features=alpha_factors)
+
+    alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)

    data_meta = DataMeta(freq=freq,
                         universe=universe,
                         batch=32,
                         neutralized_risk=None, # industry_styles,
                         pre_process=None, # [winsorize_normal, standardize],
-                         post_process=None) # [standardize])
+                         post_process=None,
+                         warm_start=12) # [standardize])

    industries = industry_list('sw_adj', 1)

@@ -301,7 +301,7 @@ if __name__ == '__main__':
                                     start_date,
                                     end_date,
                                     freq,
-                                     benchmark=905,
+                                     benchmark=906,
                                     weights_bandwidth=0.01,
                                     rebalance_method='tv',
                                     bounds=bounds,
@@ -311,5 +311,5 @@ if __name__ == '__main__':
    strategy = Strategy(alpha_model, data_meta, running_setting, dask_client=dask_client)
    ret_df, positions = strategy.run()
    ret_df[['excess_return', 'turn_over']].cumsum().plot(secondary_y='turn_over')
-    plt.title(f"{alpha_factors.keys()}")
+    plt.title(f"{str(factor)[20:40]}")
    plt.show()