fixed bug for mv optimizer

ae63c24a · Dr.李 · 857b3ce6 · ae63c24a · ae63c24a · ae63c24a
Commit ae63c24a authored Aug 09, 2017 by Dr.李
5 changed files
--- a/alphamind/analysis/factoranalysis.py
+++ b/alphamind/analysis/factoranalysis.py
@@ -230,8 +230,8 @@ def factor_analysis(factors: pd.DataFrame,
        lbound, ubound, cons_exp, risk_lbound, risk_ubound = create_constraints(benchmark, **kwargs)
        cov = kwargs['cov']

-        if 'lambda' in kwargs:
-            lam = kwargs['lambda']
+        if 'lam' in kwargs:
+            lam = kwargs['lam']
        else:
            lam = 1.


--- a/alphamind/data/engines/sqlengine.py
+++ b/alphamind/data/engines/sqlengine.py
@@ -209,33 +209,18 @@ class SqlEngine(object):
 if __name__ == '__main__':
    db_url = 'mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha?charset=cp936'

-    from alphamind.data.dbmodel.models import Uqer
-
    import datetime as dt

-    universe = Universe('custom', ['ashare'])
-    engine = SqlEngine(db_url)
-    ref_date = '2017-07-21'
-
-    codes = engine.fetch_codes(ref_date, universe)
-
-    start = dt.datetime.now()
-    for i in range(100):
-        codes = engine.fetch_codes(ref_date, universe)
-    print(dt.datetime.now() - start)
-
-    print(codes)
-    print(len(codes))
-
-    universe = Universe('zz500', ['zz500'])
+    universe = Universe('custom', ['zz500'])
    engine = SqlEngine(db_url)
-    ref_date = '2017-07-04'
+    ref_date = '2017-01-17'

-    start = dt.datetime.now()
-    for i in range(100):
    codes = engine.fetch_codes(ref_date, universe)
-    print(dt.datetime.now() - start)
+    data = engine.fetch_data(ref_date, ['EPS'], codes, 905)
+    d1ret = engine.fetch_dx_return(ref_date, codes, horizon=0)

-    print(codes)
-    print(len(codes))
+    missing_codes = [c for c in data['factor'].Code if c not in set(d1ret.Code)]

+    print(len(data['factor']))
+    print(len(d1ret))
+    print(missing_codes)
--- a/alphamind/examples/example2.py
+++ b/alphamind/examples/example2.py
@@ -5,69 +5,80 @@ Created on 2017-7-10
 @author: cheng.li
 """

-import datetime as dt
+import numpy as np
 import pandas as pd
 from alphamind.analysis.factoranalysis import factor_analysis
+from alphamind.data.engines.sqlengine import risk_styles
+from alphamind.data.engines.sqlengine import industry_styles
+from alphamind.portfolio.constraints import Constraints
 from alphamind.data.engines.sqlengine import SqlEngine
 from alphamind.data.engines.universe import Universe
-from alphamind.data.engines.sqlengine import industry_styles
 from PyFin.api import bizDatesList
-from PyFin.api import makeSchedule

 engine = SqlEngine('mssql+pymssql://licheng:A12345678!@10.63.6.220/alpha')
 universe = Universe('custom', ['zz500'])
+dates = bizDatesList('china.sse', '2017-01-01', '2017-08-05')
+factors = ['EPS', 'FEARNG', 'VAL', 'NIAP']
+f_weights = np.array([1., 1., 1., 1.])

-used_risk_styles = ['SIZE']
-
-total_risks = used_risk_styles + industry_styles
-build_type = 'risk_neutral'
-
+neutralize_risk = ['SIZE'] + industry_styles
+constraint_risk = []

-def calculate_one_day(ref_date, factors, factor_weights, horizon_end=None):
-    print(ref_date)
+rets = []

+for date in dates:
+    print(date)
+    ref_date = date.strftime('%Y-%m-%d')
    codes = engine.fetch_codes(ref_date, universe)
-    total_data = engine.fetch_data(ref_date, factors, codes, 905)
+    data = engine.fetch_data(ref_date, factors, codes, 905, risk_model='short')
+    returns = engine.fetch_dx_return(ref_date, codes, 0)

-    factor_data = total_data['factor']
-    factor_df = factor_data[['Code', 'industry', 'weight', 'isOpen'] + total_risks + factors].dropna()
+    total_data = pd.merge(data['factor'], returns, on=['Code']).dropna()
+    risk_cov = data['risk_cov']

-    dx_return = engine.fetch_dx_return(ref_date, codes, expiry_date=horizon_end)
-    factor_df = pd.merge(factor_df, dx_return, on=['Code'])
+    total_risks = risk_cov.Factor
+    risk_cov = risk_cov[total_risks]
+    risk_exp = total_data[total_risks]
+    stocks_cov = ((risk_exp.values @ risk_cov.values @ risk_exp.values.T) + np.diag(total_data.SRISK ** 2)) / 10000.

-    weights, _ = factor_analysis(factor_df[factors],
-                                 factor_weights,
-                                 factor_df.industry.values,
-                                 None,
-                                 detail_analysis=False,
-                                 benchmark=factor_df.weight.values,
-                                 risk_exp=factor_df[total_risks].values,
-                                 is_tradable=factor_df.isOpen.values.astype(bool),
-                                 method=build_type)
+    f_data = total_data[factors]

-    return ref_date, (weights.weight - factor_df.weight).dot(factor_df.dx)
+    industry = total_data.industry_code.values
+    dx_return = total_data.dx.values
+    benchmark = total_data.weight.values
+    risk_exp = total_data[neutralize_risk].values
+    constraint_exp = total_data[constraint_risk].values
+    risk_exp_expand = np.concatenate((constraint_exp, np.ones((len(risk_exp), 1))), axis=1).astype(float)

+    risk_names = constraint_risk + ['total']
+    risk_target = risk_exp_expand.T @ benchmark

-if __name__ == '__main__':
+    lbound = 0.
+    ubound = 0.05 + benchmark

-    from matplotlib import pyplot as plt
+    constraint = Constraints(risk_exp_expand, risk_names)
+    for i, name in enumerate(risk_names):
+        constraint.set_constraints(name, lower_bound=risk_target[i], upper_bound=risk_target[i])

-    factors = ['BDTO', 'CFinc1', 'DivP', 'EPS', 'RVOL', 'DROEAfterNonRecurring']
-    factor_weights = [0.10, 0.30, 0.15, 0.18, 0.11, 0.35]
-    biz_dates = makeSchedule('2015-01-01', '2017-07-07', '1w', 'china.sse')
-
-    ers = []
-    dates = []
-
-    for i, ref_date in enumerate(biz_dates[:-1]):
-        ref_date = ref_date.strftime("%Y-%m-%d")
    try:
-            ref_date, er = calculate_one_day(ref_date, factors, factor_weights, horizon_end=biz_dates[i+1])
-            dates.append(ref_date)
-            ers.append(er)
-        except Exception as e:
-            print(str(e) + ": {0}".format(ref_date))
-
-    res = pd.Series(ers, index=dates)
-    res.cumsum().plot()
-    plt.show()
+        pos, analysis = factor_analysis(f_data,
+                                        f_weights,
+                                        industry,
+                                        dx_return,
+                                        benchmark=benchmark,
+                                        risk_exp=risk_exp,
+                                        is_tradable=total_data.isOpen.values.astype(bool),
+                                        method='mv',
+                                        constraints=constraint,
+                                        cov=stocks_cov,
+                                        use_rank=100,
+                                        lam=100.,
+                                        lbound=lbound,
+                                        ubound=ubound)
+    except:
+        rets.append(0.)
+        print("{0} is error!".format(date))
+    else:
+        rets.append(analysis.er[-1])
+
+ret_series = pd.Series(rets, dates)
\ No newline at end of file
--- a/notebooks/Linear Optimizer Check.ipynb
+++ b/notebooks/Linear Optimizer Check.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -15,7 +15,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
@@ -26,8 +26,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "def time_function(py_callable, n):\n",
@@ -66,13 +68,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scale(n)    time(ms)       feval      min(x)      max(x)      sum(x)    x(0) + x(1)\n",
+      "200            18.01       -0.82    0.000000    0.010000    1.000000          0.015\n",
+      "400            23.02       -1.28   -0.000000    0.010000    1.000000          0.015\n",
+      "600            42.63       -1.54   -0.000000    0.010000    1.000000          0.015\n",
+      "800            62.04       -1.63   -0.000000    0.010000    1.000000          0.015\n",
+      "1000           76.57       -1.72   -0.000000    0.010000    1.000000          0.015\n",
+      "1200          108.73       -1.81   -0.000000    0.010000    1.000000          0.015\n",
+      "1400          136.22       -1.90   -0.000000    0.010000    1.000000          0.015\n",
+      "1600          166.64       -1.96   -0.000000    0.010000    1.000000          0.015\n",
+      "1800          197.72       -2.03   -0.000000    0.010000    1.000000          0.015\n",
+      "2000          258.51       -2.06   -0.000000    0.010000    1.000000          0.015\n",
+      "2200          291.34       -2.07   -0.000000    0.010000    1.000000          0.015\n",
+      "2400          348.30       -2.13   -0.000000    0.010000    1.000000          0.015\n",
+      "2600          398.31       -2.14   -0.000000    0.010000    1.000000          0.015\n",
+      "2800          462.13       -2.16   -0.000000    0.010000    1.000000          0.015\n",
+      "3000          547.84       -2.19   -0.000000    0.010000    1.000000          0.015\n"
+     ]
+    }
+   ],
   "source": [
    "print(\"{0:<8}{1:>12}{2:>12}{3:>12}{4:>12}{5:>12}{6:>15}\".format('Scale(n)', 'time(ms)', 'feval', 'min(x)', 'max(x)', 'sum(x)', 'x(0) + x(1)'))\n",
    "\n",
-    "for n in range(200, 10200, 200):\n",
+    "for n in range(200, 3200, 200):\n",
    "    elapsed, result = time_function(cvxpy_lp, n)\n",
    "    s = np.array(result[0].value).flatten()\n",
    "    print(\"{0:<8}{1:>12.2f}{2:>12.2f}{3:>12f}{4:>12f}{5:>12f}{6:>15}\".format(n, elapsed*1000, result[1].value, s.min(), s.max(), s.sum(), s[0] + s[1]))"
@@ -80,7 +105,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
@@ -105,13 +130,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Scale(n)    time(ms)       feval      min(x)      max(x)      sum(x)    x(0) + x(1)\n",
+      "200            19.01       -0.82    0.000000    0.010000    1.000000          0.015\n",
+      "400             1.00       -1.28    0.000000    0.010000    1.000000          0.015\n",
+      "600             2.00       -1.54    0.000000    0.010000    1.000000          0.015\n",
+      "800             3.00       -1.63    0.000000    0.010000    1.000000          0.015\n",
+      "1000            2.00       -1.72    0.000000    0.010000    1.000000          0.015\n",
+      "1200            3.00       -1.81    0.000000    0.010000    1.000000          0.015\n",
+      "1400            2.02       -1.90    0.000000    0.010000    1.000000          0.015\n",
+      "1600            2.02       -1.96    0.000000    0.010000    1.000000          0.015\n",
+      "1800            1.98       -2.03    0.000000    0.010000    1.000000          0.015\n",
+      "2000            2.02       -2.06    0.000000    0.010000    1.000000          0.015\n",
+      "2200            2.00       -2.07    0.000000    0.010000    1.000000          0.015\n",
+      "2400            2.00       -2.13    0.000000    0.010000    1.000000          0.015\n",
+      "2600            3.00       -2.14    0.000000    0.010000    1.000000          0.015\n",
+      "2800            3.02       -2.16    0.000000    0.010000    1.000000          0.015\n",
+      "3000            3.00       -2.19    0.000000    0.010000    1.000000          0.015\n"
+     ]
+    }
+   ],
   "source": [
    "print(\"{0:<8}{1:>12}{2:>12}{3:>12}{4:>12}{5:>12}{6:>15}\".format('Scale(n)', 'time(ms)', 'feval', 'min(x)', 'max(x)', 'sum(x)', 'x(0) + x(1)'))\n",
    "\n",
-    "for n in range(200, 10200, 200):\n",
+    "for n in range(200, 3200, 200):\n",
    "    elapsed, result = time_function(clp_lp, n)\n",
    "    s = result[2]\n",
    "    print(\"{0:<8}{1:>12.2f}{2:>12.2f}{3:>12f}{4:>12f}{5:>12f}{6:>15}\".format(n, elapsed*1000, result[1], s.min(), s.max(), s.sum(), s[0] + s[1]))"
@@ -129,7 +177,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },

--- a/notebooks/Quadratic Optimizer Check.ipynb
+++ b/notebooks/Quadratic Optimizer Check.ipynb