update schedule setting

093c1659 · Dr.李 · b9b8fdd2 · 093c1659 · 093c1659 · 093c1659
Commit 093c1659 authored Nov 19, 2017 by Dr.李
Hide whitespace changes
Inline Side-by-side

Showing with 201 additions and 194 deletions

api.py alphamind/api.py +2 -0

sqlengine.py alphamind/data/engines/sqlengine.py +8 -6

combined_model_training.py alphamind/examples/combined_model_training.py +191 -188

No files found.
--- a/alphamind/api.py
+++ b/alphamind/api.py
@@ -50,6 +50,8 @@ def map_freq(freq):
        horizon = 19
    elif freq == '1d':
        horizon = 0
+    elif freq[-1] == "b":
+        horizon = int(freq[:-1]) - 1
    else:
        raise ValueError("Unrecognized freq: {0}".format(freq))
    return horizon

--- a/alphamind/data/engines/sqlengine.py
+++ b/alphamind/data/engines/sqlengine.py
@@ -184,18 +184,19 @@ class SqlEngine(object):
                        ref_date: str,
                        codes: Iterable[int],
                        expiry_date: str = None,
-                        horizon: int = 0) -> pd.DataFrame:
+                        horizon: int = 0,
+                        offset: int = 0) -> pd.DataFrame:
        start_date = ref_date
        if not expiry_date:
-            end_date = advanceDateByCalendar('china.sse', ref_date, str(horizon + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
+            end_date = advanceDateByCalendar('china.sse', ref_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y%m%d')
        else:
            end_date = expiry_date
        stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
            partition_by=DailyReturn.code,
            order_by=DailyReturn.trade_date,
-            rows=(DAILY_RETURN_OFFSET, horizon + DAILY_RETURN_OFFSET)).label('dx')
+            rows=(DAILY_RETURN_OFFSET + offset, horizon + DAILY_RETURN_OFFSET + offset)).label('dx')
        query = select([DailyReturn.trade_date, DailyReturn.code, stats]).where(
            and_(
@@ -213,13 +214,14 @@ class SqlEngine(object):
                              start_date: str = None,
                              end_date: str = None,
                              dates: Iterable[str] = None,
-                              horizon: int = 0) -> pd.DataFrame:
+                              horizon: int = 0,
+                              offset: int = 0) -> pd.DataFrame:
        if dates:
            start_date = dates[0]
            end_date = dates[-1]
-        end_date = advanceDateByCalendar('china.sse', end_date, str(horizon + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
+        end_date = advanceDateByCalendar('china.sse', end_date, str(horizon + offset + DAILY_RETURN_OFFSET) + 'b').strftime('%Y-%m-%d')
        cond = universe.query_range(start_date, end_date)
        big_table = join(DailyReturn, UniverseTable,
@@ -230,7 +232,7 @@ class SqlEngine(object):
        stats = func.sum(self.ln_func(1. + DailyReturn.d1)).over(
            partition_by=DailyReturn.code,
            order_by=DailyReturn.trade_date,
-            rows=(DAILY_RETURN_OFFSET, horizon + DAILY_RETURN_OFFSET)).label('dx')
+            rows=(offset + DAILY_RETURN_OFFSET, horizon + offset + DAILY_RETURN_OFFSET)).label('dx')
        query = select([DailyReturn.trade_date, DailyReturn.code, stats]) \
            .select_from(big_table)

--- a/alphamind/examples/combined_model_training.py
+++ b/alphamind/examples/combined_model_training.py
@@ -25,18 +25,18 @@ end_date = '2017-11-15'
 benchmark_code = 300
 universe_name = ['zz500', 'hs300']
 universe = Universe(universe_name, universe_name)
-frequency = '2w'
+frequency = '2b'
 batch = 8
 method = 'risk_neutral'
 use_rank = 100
 industry_lower = 1.
 industry_upper = 1.
 neutralize_risk = ['SIZE'] + industry_styles
-constraint_risk = industry_styles
+constraint_risk = ['SIZE'] + industry_styles
 size_risk_lower = 0
 size_risk_upper = 0
-turn_over_target_base = 0.2
+turn_over_target_base = 0.1
-weight_gap = 0.03
+weight_gaps = [0.01, 0.02, 0.03, 0.04]
 benchmark_total_lower = 0.8
 benchmark_total_upper = 1.
 horizon = map_freq(frequency)
@@ -101,7 +101,7 @@ for ref_date in ref_dates:
    alpha_logger.info('trade_date: {0} training finished'.format(ref_date))
-frequency = '1w'
+frequency = '2b'
 ref_dates = makeSchedule(start_date, end_date, frequency, 'china.sse')
 const_model_factor_data = engine.fetch_data_range(universe,
@@ -117,187 +117,190 @@ Predicting and re-balance phase
 factor_groups = const_model_factor_data.groupby('trade_date')
-rets = []
+for weight_gap in weight_gaps:
-turn_overs = []
+    print("start {0} weight gap simulation ...".format(weight_gap))
-leverags = []
-previous_pos = pd.DataFrame()
+    rets = []
+    turn_overs = []
-index_dates = []
+    leverags = []
+    previous_pos = pd.DataFrame()
-for i, value in enumerate(factor_groups):
-    date = value[0]
+    index_dates = []
-    data = value[1]
-    ref_date = date.strftime('%Y-%m-%d')
+    for i, value in enumerate(factor_groups):
+        date = value[0]
-    total_data = data.fillna(data[total_features].median())
+        data = value[1]
-    alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
+        ref_date = date.strftime('%Y-%m-%d')
-    risk_exp = total_data[neutralize_risk].values.astype(float)
-    industry = total_data.industry_code.values
+        total_data = data.fillna(data[total_features].median())
-    benchmark_w = total_data.weight.values
+        alpha_logger.info('{0}: {1}'.format(date, len(total_data)))
+        risk_exp = total_data[neutralize_risk].values.astype(float)
-    constraint_exp = total_data[constraint_risk].values
+        industry = total_data.industry_code.values
-    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
+        benchmark_w = total_data.weight.values
-    risk_names = constraint_risk + ['total']
+        constraint_exp = total_data[constraint_risk].values
-    risk_target = risk_exp_expand.T @ benchmark_w
+        risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)
-    lbound = np.maximum(0., hedging_ratio * benchmark_w - weight_gap)  # np.zeros(len(total_data))
+        risk_names = constraint_risk + ['total']
-    ubound = weight_gap + hedging_ratio * benchmark_w
+        risk_target = risk_exp_expand.T @ benchmark_w
-    is_in_benchmark = (benchmark_w > 0.).astype(float)
+        lbound = np.maximum(0., hedging_ratio * benchmark_w - weight_gap)  # np.zeros(len(total_data))
+        ubound = weight_gap + hedging_ratio * benchmark_w
-    risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
-    risk_names.append('benchmark_total')
+        is_in_benchmark = (benchmark_w > 0.).astype(float)
-    constraint = Constraints(risk_exp_expand, risk_names)
+        risk_exp_expand = np.concatenate((risk_exp_expand, is_in_benchmark.reshape((-1, 1))), axis=1).astype(float)
+        risk_names.append('benchmark_total')
-    for i, name in enumerate(risk_names):
-        if name == 'total':
+        constraint = Constraints(risk_exp_expand, risk_names)
-            constraint.set_constraints(name,
-                                       lower_bound=risk_target[i],
+        for i, name in enumerate(risk_names):
-                                       upper_bound=risk_target[i])
+            if name == 'total':
-        elif name == 'SIZE':
+                constraint.set_constraints(name,
-            base_target = abs(risk_target[i])
+                                           lower_bound=risk_target[i],
-            constraint.set_constraints(name,
+                                           upper_bound=risk_target[i])
-                                       lower_bound=risk_target[i] + base_target * size_risk_lower,
+            elif name == 'SIZE':
-                                       upper_bound=risk_target[i] + base_target * size_risk_upper)
+                base_target = abs(risk_target[i])
-        elif name == 'benchmark_total':
+                constraint.set_constraints(name,
-            base_target = benchmark_w.sum()
+                                           lower_bound=risk_target[i] + base_target * size_risk_lower,
-            constraint.set_constraints(name,
+                                           upper_bound=risk_target[i] + base_target * size_risk_upper)
-                                       lower_bound=benchmark_total_lower * base_target,
+            elif name == 'benchmark_total':
-                                       upper_bound=benchmark_total_upper * base_target)
+                base_target = benchmark_w.sum()
+                constraint.set_constraints(name,
+                                           lower_bound=benchmark_total_lower * base_target,
+                                           upper_bound=benchmark_total_upper * base_target)
+            else:
+                constraint.set_constraints(name,
+                                           lower_bound=risk_target[i] * industry_lower,
+                                           upper_bound=risk_target[i] * industry_upper)
+        factor_values = factor_processing(total_data[const_features].values,
+                                          pre_process=[winsorize_normal, standardize],
+                                          risk_factors=risk_exp,
+                                          post_process=[winsorize_normal, standardize])
+        # const linear model
+        er1 = const_model.predict(factor_values)
+        # linear regression model
+        models = models_series[models_series.index <= date]
+        if models.empty:
+            continue
+        index_dates.append(date)
+        model = models[-1]
+        # x = predict_x[date]
+        x = factor_processing(total_data[linear_model_features].values,
+                              pre_process=[winsorize_normal, standardize],
+                              risk_factors=risk_exp,
+                              post_process=[winsorize_normal, standardize])
+        er2 = model.predict(x)
+        # combine model
+        er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values})
+        er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': total_data.code.values})
+        er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0)
+        er = (er_table.er1 + er_table.er2).values
+        codes = total_data['code'].values
+        if previous_pos.empty:
+            current_position = None
+            turn_over_target = None
        else:
-            constraint.set_constraints(name,
+            previous_pos.set_index('code', inplace=True)
-                                       lower_bound=risk_target[i] * industry_lower,
+            remained_pos = previous_pos.loc[codes]
-                                       upper_bound=risk_target[i] * industry_upper)
+            remained_pos.fillna(0., inplace=True)
-    factor_values = factor_processing(total_data[const_features].values,
+            turn_over_target = turn_over_target_base
-                                      pre_process=[winsorize_normal, standardize],
+            current_position = remained_pos.weight.values
-                                      risk_factors=risk_exp,
-                                      post_process=[winsorize_normal, standardize])
+        try:
+            target_pos, _ = er_portfolio_analysis(er,
-    # const linear model
+                                                  industry,
-    er1 = const_model.predict(factor_values)
+                                                  None,
+                                                  constraint,
-    # linear regression model
+                                                  False,
-    models = models_series[models_series.index <= date]
+                                                  benchmark_w,
-    if models.empty:
+                                                  method=method,
-        continue
+                                                  use_rank=use_rank,
+                                                  turn_over_target=turn_over_target,
-    index_dates.append(date)
+                                                  current_position=current_position,
-    model = models[-1]
+                                                  lbound=lbound,
+                                                  ubound=ubound)
-    # x = predict_x[date]
+        except ValueError:
-    x = factor_processing(total_data[linear_model_features].values,
+            alpha_logger.info('{0} full re-balance'.format(date))
-                          pre_process=[winsorize_normal, standardize],
+            target_pos, _ = er_portfolio_analysis(er,
-                          risk_factors=risk_exp,
+                                                  industry,
-                          post_process=[winsorize_normal, standardize])
+                                                  None,
-    er2 = model.predict(x)
+                                                  constraint,
+                                                  False,
-    # combine model
+                                                  benchmark_w,
-    er1_table = pd.DataFrame({'er1': er1 / er1.std(), 'code': total_data.code.values})
+                                                  method=method,
-    er2_table = pd.DataFrame({'er2': er2 / er2.std(), 'code': total_data.code.values})
+                                                  use_rank=use_rank,
-    er_table = pd.merge(er1_table, er2_table, on=['code'], how='left').fillna(0)
+                                                  lbound=lbound,
+                                                  ubound=ubound)
-    er = (er_table.er1 + er_table.er2).values
+        target_pos['code'] = total_data['code'].values
-    codes = total_data['code'].values
+        turn_over, executed_pos = executor.execute(target_pos=target_pos)
-    if previous_pos.empty:
-        current_position = None
+        executed_codes = executed_pos.code.tolist()
-        turn_over_target = None
+        dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon, offset=1)
-    else:
-        previous_pos.set_index('code', inplace=True)
+        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
-        remained_pos = previous_pos.loc[codes]
+        result = pd.merge(result, dx_returns, on=['code'])
-        remained_pos.fillna(0., inplace=True)
+        leverage = result.weight_x.abs().sum()
-        turn_over_target = turn_over_target_base
-        current_position = remained_pos.weight.values
+        ret = (result.weight_x - hedging_ratio * result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
+        rets.append(ret)
-    try:
+        executor.set_current(executed_pos)
-        target_pos, _ = er_portfolio_analysis(er,
+        turn_overs.append(turn_over)
-                                              industry,
+        leverags.append(leverage)
-                                              None,
-                                              constraint,
+        previous_pos = executed_pos
-                                              False,
+        alpha_logger.info('{0} is finished'.format(date))
-                                              benchmark_w,
-                                              method=method,
+    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
-                                              use_rank=use_rank,
+    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
-                                              turn_over_target=turn_over_target,
+    ret_df = ret_df.shift(1)
-                                              current_position=current_position,
+    ret_df.iloc[0] = 0.
-                                              lbound=lbound,
+    ret_df['tc_cost'] = ret_df.turn_over * 0.002
-                                              ubound=ubound)
-    except ValueError:
+    ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
-        alpha_logger.info('{0} full re-balance'.format(date))
+                                                 title='Fixed frequency rebalanced: {0}'.format(frequency),
-        target_pos, _ = er_portfolio_analysis(er,
+                                                 secondary_y='tc_cost')
-                                              industry,
-                                              None,
+    ret_df['ret_after_tc'] = ret_df['returns'] - ret_df['tc_cost']
-                                              constraint,
-                                              False,
+    sharp_calc = MovingSharp(49)
-                                              benchmark_w,
+    drawdown_calc = MovingMaxDrawdown(49)
-                                              method=method,
+    max_drawdown_calc = MovingMaxDrawdown(len(ret_df))
-                                              use_rank=use_rank,
-                                              lbound=lbound,
+    res_df = pd.DataFrame(columns=['daily_return', 'cum_ret', 'sharp', 'drawdown', 'max_drawn'])
-                                              ubound=ubound)
+    total_returns = 0.
-    target_pos['code'] = total_data['code'].values
+    for i, ret in enumerate(ret_df['ret_after_tc']):
-    turn_over, executed_pos = executor.execute(target_pos=target_pos)
+        date = ret_df.index[i]
+        total_returns += ret
-    executed_codes = executed_pos.code.tolist()
+        sharp_calc.push({'ret': ret, 'riskFree': 0.})
-    dx_returns = engine.fetch_dx_return(date, executed_codes, horizon=horizon)
+        drawdown_calc.push({'ret': ret})
+        max_drawdown_calc.push({'ret': ret})
-    result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
-    result = pd.merge(result, dx_returns, on=['code'])
+        res_df.loc[date, 'daily_return'] = ret
+        res_df.loc[date, 'cum_ret'] = total_returns
-    leverage = result.weight_x.abs().sum()
+        res_df.loc[date, 'drawdown'] = drawdown_calc.result()[0]
+        res_df.loc[date, 'max_drawn'] = max_drawdown_calc.result()[0]
-    ret = (result.weight_x - hedging_ratio * result.weight_y * leverage / result.weight_y.sum()).values @ result.dx.values
-    rets.append(ret)
+        if i < 10:
-    executor.set_current(executed_pos)
+            res_df.loc[date, 'sharp'] = 0.
-    turn_overs.append(turn_over)
+        else:
-    leverags.append(leverage)
+            res_df.loc[date, 'sharp'] = sharp_calc.result() * np.sqrt(49)
-    previous_pos = executed_pos
+    res_df.to_csv('hs300_{0}.csv'.format(int(weight_gap * 100)))
-    alpha_logger.info('{0} is finished'.format(date))
+# plt.show()
-ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'leverage': leverage}, index=index_dates)
-ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], frequency)] = 0.
-ret_df = ret_df.shift(1)
-ret_df.iloc[0] = 0.
-ret_df['tc_cost'] = ret_df.turn_over * 0.002
-ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
-                                             title='Fixed frequency rebalanced: {0}'.format(frequency),
-                                             secondary_y='tc_cost')
-ret_df['ret_after_tc'] = ret_df['returns'] - ret_df['tc_cost']
-sharp_calc = MovingSharp(52)
-drawdown_calc = MovingMaxDrawdown(52)
-max_drawdown_calc = MovingMaxDrawdown(len(ret_df))
-res_df = pd.DataFrame(columns=['daily_return', 'cum_ret', 'sharp', 'drawdown', 'max_drawn'])
-total_returns = 0.
-for i, ret in enumerate(ret_df['ret_after_tc']):
-    date = ret_df.index[i]
-    total_returns += ret
-    sharp_calc.push({'ret': ret, 'riskFree': 0.})
-    drawdown_calc.push({'ret': ret})
-    max_drawdown_calc.push({'ret': ret})
-    res_df.loc[date, 'daily_return'] = ret
-    res_df.loc[date, 'cum_ret'] = total_returns
-    res_df.loc[date, 'drawdown'] = drawdown_calc.result()[0]
-    res_df.loc[date, 'max_drawn'] = max_drawdown_calc.result()[0]
-    if i < 10:
-        res_df.loc[date, 'sharp'] = 0.
-    else:
-        res_df.loc[date, 'sharp'] = sharp_calc.result() * np.sqrt(52)
-res_df.to_csv('hs300_{0}.csv'.format(int(weight_gap * 100)))
-#plt.show()