added data log table

38432d45 · Dr.李 · b7fa9cc6 · 38432d45 · 38432d45 · 38432d45
Commit 38432d45 authored Oct 12, 2017 by Dr.李
4 changed files
--- a/alphamind/data/dbmodel/models.py
+++ b/alphamind/data/dbmodel/models.py
@@ -2370,6 +2370,21 @@ class Uqer(Base):
    NLSIZE = Column(Float(53))


+class DataLog(Base):
+    __tablename__ = 'data_log'
+    __table_args__ = (
+        Index('data_log_idx', 'trade_date', 'factor', 'source', 'universe', unique=True),
+    )
+
+    trade_date = Column(DateTime, primary_key=True, nullable=False)
+    factor = Column(String(30), primary_key=True, nullable=False)
+    source = Column(String(30), primary_key=True, nullable=False)
+    universe = Column(String(20), primary_key=True, nullable=False)
+    coverage = Column(Float(53))
+    maximum = Column(Float(53))
+    minimum = Column(Float(53))
+
+
 if __name__ == '__main__':
    from sqlalchemy import create_engine


--- a/alphamind/examples/factor_analysis_example.py
+++ b/alphamind/examples/factor_analysis_example.py
@@ -11,23 +11,22 @@ from matplotlib import pyplot as plt
 from alphamind.api import *
 from PyFin.api import *

-
 strategies = {
    'prod': {
        # 'factors': ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
        # 'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
-        #'factors':  ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
-        #'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
+        # 'factors':  ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'],
+        # 'weights': [0.05, 0.3, 0.35, 0.075, 0.15, 0.05]
        'factors': ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO'],
        'weights': [0.034129344,
-0.015881607,
-0.048765746,
-0.042747382,
-0.015900173,
-0.019044573,
-0.001792638,
-0.014277867,
-]
+                    0.015881607,
+                    0.048765746,
+                    0.042747382,
+                    -0.015900173,
+                    0.019044573,
+                    -0.001792638,
+                    0.014277867,
+                    ]
    },
    # 'candidate': {
    #     'factors': ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'GREV', 'ROEDiluted'],
@@ -35,7 +34,6 @@ strategies = {
    # }
 }

-
 engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
 universe = Universe('custom', ['zz500'])
 benchmark_code = 905
@@ -54,8 +52,8 @@ elif freq == '3w':
 elif freq == '1d':
    horizon = 0

-dates = makeSchedule('2017-01-01',
-                     '2017-09-05',
+dates = makeSchedule('2012-01-01',
+                     '2017-09-15',
                     tenor=freq,
                     calendar='china.sse',
                     dateGenerationRule=DateGeneration.Forward)
@@ -120,7 +118,6 @@ for strategy in strategies:

    total_data_dict[strategy] = rets

-
 ret_df = pd.DataFrame(total_data_dict, index=dates)
 ret_df.loc[advanceDateByCalendar('china.sse', dates[-1], freq)] = 0.
 ret_df = ret_df.shift(1)

--- a/alphamind/examples/formula_expression.py
+++ b/alphamind/examples/formula_expression.py
@@ -17,9 +17,15 @@ import datetime as dt

 start = dt.datetime.now()

-formula1 = CSRank(DIFF(LOG("turnoverValue")))
-formula2 = CSRank((LAST('closePrice') - LAST('openPrice')) / LAST('openPrice'))
-expression = -CORR(6, formula1 ^ formula2)
+# formula1 = CSRank(DIFF(LOG("turnoverVol")))
+# formula2 = CSRank((LAST('highestPrice') - LAST('lowestPrice')) / LAST('lowestPrice'))
+# expression = -CORR(6, formula1 ^ formula2)
+
+factor1 = LAST('RVOL')
+factor2 = LAST('IVR')
+expression = RES(20, factor2 ^ factor1)
+
+# expression = MA(1, "EPS")

 alpha_factor_name = 'alpha_factor'
 alpha_factor = {alpha_factor_name: expression}
@@ -30,11 +36,11 @@ engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha'
 universe = Universe('custom', ['zz500'])
 benchmark_code = 905
 neutralize_risk = ['SIZE'] + industry_styles
-freq = '1d'
+freq = '1w'
 n_bins = 5

-dates = makeSchedule('2012-04-01',
-                     '2017-09-03',
+dates = makeSchedule('2012-01-01',
+                     '2017-09-18',
                     tenor=freq,
                     calendar='china.sse')

@@ -42,7 +48,7 @@ factor_all_data = engine.fetch_data_range(universe,
                                          alpha_factor,
                                          dates=dates,
                                          benchmark=905)['factor']
-return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=0)
+return_all_data = engine.fetch_dx_return_range(universe, dates=dates, horizon=4)

 factor_groups = factor_all_data.groupby('trade_date')
 return_groups = return_all_data.groupby('trade_date')

--- a/alphamind/examples/model_training.py
+++ b/alphamind/examples/model_training.py
@@ -34,16 +34,16 @@ training     - every 4 week

 engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
 universe = Universe('zz500', ['zz500'])
-neutralize_risk = ['SIZE'] + industry_styles
-portfolio_risk_neutralize = ['SIZE']
+neutralize_risk = industry_styles
+portfolio_risk_neutralize = []
 portfolio_industry_neutralize = True
-alpha_factors = ['RVOL', 'EPS', 'DROEAfterNonRecurring', 'DivP', 'CFinc1', 'BDTO'] # ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']  # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
+alpha_factors = ['VAL', 'RVOL', 'ROEDiluted', 'GREV', 'EPS', 'CHV', 'CFinc1', 'BDTO'] # ['RVOL', 'EPS', 'CFinc1', 'BDTO', 'VAL', 'CHV', 'GREV', 'ROEDiluted']  # ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
 benchmark = 905
 n_bins = 5
 frequency = '2w'
-batch = 1
-start_date = '2017-01-05'
-end_date = '2017-09-05'
+batch = 4
+start_date = '2017-01-01'
+end_date = '2017-09-26'
 method = 'risk_neutral'
 use_rank = 100

@@ -62,7 +62,7 @@ data_package = fetch_data_package(engine,
                                  neutralized_risk=neutralize_risk,
                                  pre_process=[winsorize_normal, standardize],
                                  post_process=[winsorize_normal, standardize],
-                                  warm_start=8)
+                                  warm_start=batch)

 '''
 training phase: using Linear - regression from scikit-learn
@@ -76,12 +76,22 @@ dates = sorted(train_x.keys())
 model_df = pd.Series()

 for train_date in dates:
-    #model = LinearRegression(alpha_factors, fit_intercept=False)
+    model = LinearRegression(alpha_factors, fit_intercept=False)
    #model = LassoCV(fit_intercept=False)
-    #model = AdaBoostRegressor(n_estimators=100)
+    # model = AdaBoostRegressor(n_estimators=100)
    #model = RandomForestRegressor(n_estimators=100, n_jobs=4)
    #model = NuSVR(kernel='rbf', C=1e-3, gamma=0.1)
-    model = ConstLinearModel(alpha_factors, np.array([0.05, 0.3, 0.35, 0.075, 0.15, 0.05]))
+    # model = ConstLinearModel(alpha_factors, np.array([0.034129344,
+    #                 0.015881607,
+    #                 0.048765746,
+    #                 0.042747382,
+    #                 -0.015900173,
+    #                 0.019044573,
+    #                 -0.001792638,
+    #                 0.014277867,
+    #                 ]))
+
+    # model = ConstLinearModel(alpha_factors, np.array([1.] * len(alpha_factors)))
    x = train_x[train_date]
    y = train_y[train_date]

@@ -165,6 +175,8 @@ for i, predict_date in enumerate(dates):
            cons.set_constraints(name, benchmark_exp[k], benchmark_exp[k])

    predict_y = model.predict(x)
+
+    is_tradable[:] = True
    weights, analysis = er_portfolio_analysis(predict_y,
                                              industry_names,
                                              realized_r,