fixed neutralize test issue

5ea39a77 · Dr.李 · 45fc0125 · 5ea39a77 · 5ea39a77 · 5ea39a77
Commit 5ea39a77 authored May 03, 2017 by Dr.李
5 changed files
--- a/alphamind/benchmarks/data/neutralize.py
+++ b/alphamind/benchmarks/data/neutralize.py
@@ -74,3 +74,4 @@ def benchmark_neutralize_with_groups(n_samples: int, n_features: int, n_loops: i
 if __name__ == '__main__':
    benchmark_neutralize(3000, 10, 1000)
    benchmark_neutralize_with_groups(3000, 10, 1000, 30)
--- a/alphamind/data/neutralize.py
+++ b/alphamind/data/neutralize.py
@@ -16,6 +16,10 @@ from alphamind.groupby import groupby
 def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_explained=False, output_exposure=False) \
        -> Union[np.ndarray, Tuple[np.ndarray, Dict]]:
+    if y.ndim == 1:
+        y = y.reshape((-1, 1))
    if groups is not None:
        res = zeros(y.shape)
@@ -42,8 +46,7 @@ def neutralize(x: np.ndarray, y: np.ndarray, groups: np.ndarray=None, output_exp
                    exposure[curr_idx, :, i] = b[:, i]
            if output_explained:
                for i in range(explained.shape[2]):
-                    b
+                    explained[curr_idx] = ls_explain(curr_x, b)
-                    explained[curr_idx, :, i] = ls_explain(curr_x, b)
    else:
        b = ls_fit(x, y)
        res = ls_res(x, y, b)
@@ -76,15 +79,8 @@ def ls_res(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
 def ls_explain(x: np.ndarray, b: np.ndarray) -> np.ndarray:
-    if b.ndim == 1:
+    explained = np.zeros(x.shape + (b.shape[1],))
-        return b * x
+    for i in range(b.shape[1]):
-    else:
-        n_samples = x.shape[0]
-        to_explain = b.shape[1]
-        factors = x.shape[1]
-        explained = zeros((n_samples, factors, to_explain))
-        for i in range(to_explain):
        explained[:, :, i] = b[:, i] * x
    return explained

--- a/alphamind/tests/data/test_neutralize.py
+++ b/alphamind/tests/data/test_neutralize.py
@@ -54,8 +54,8 @@ class TestNeutralize(unittest.TestCase):
        exp_res = y - x @ model.coef_.T
        exp_explained = x * model.coef_.T
-        np.testing.assert_array_almost_equal(calc_res, exp_res)
+        np.testing.assert_array_almost_equal(calc_res, exp_res.reshape(-1, 1))
-        np.testing.assert_array_almost_equal(other_stats['explained'], exp_explained)
+        np.testing.assert_array_almost_equal(other_stats['explained'][:, :, 0], exp_explained)
        y = np.random.randn(3000, 4)
        x = np.random.randn(3000, 10)
@@ -86,8 +86,8 @@ class TestNeutralize(unittest.TestCase):
            model.fit(curr_x, curr_y)
            exp_res = curr_y - curr_x @ model.coef_.T
            exp_explained = curr_x * model.coef_.T
-            np.testing.assert_array_almost_equal(calc_res[groups == i], exp_res)
+            np.testing.assert_array_almost_equal(calc_res[groups == i], exp_res.reshape(-1, 1))
-            np.testing.assert_array_almost_equal(other_stats['explained'][groups == i], exp_explained)
+            np.testing.assert_array_almost_equal(other_stats['explained'][groups == i, :, 0], exp_explained)
        y = np.random.randn(3000, 4)
        x = np.random.randn(3000, 10)

--- a/notebooks/factor analysis.ipynb
+++ b/notebooks/factor analysis.ipynb
@@ -155,7 +155,9 @@
  {
   "cell_type": "code",
   "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "df = pd.merge(df, benchmark_data[['Date', 'd1ret_b']], on='Date', how='inner')"
@@ -193,7 +195,9 @@
  {
   "cell_type": "code",
   "execution_count": 12,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "total_data = df.copy()"
@@ -658,7 +662,9 @@
  {
   "cell_type": "code",
   "execution_count": 19,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "ret_series = (total_data.pos * (total_data.d1ret - total_data.d1ret_b)).groupby(total_data.Date).sum()"
@@ -984,7 +990,9 @@
  {
   "cell_type": "code",
   "execution_count": 29,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "excess_return = (total_data.d1ret - total_data.d1ret_b).values\n",
@@ -1050,7 +1058,9 @@
  {
   "cell_type": "code",
   "execution_count": 34,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "cols = ['idiosyncratic']\n",
@@ -1061,7 +1071,9 @@
  {
   "cell_type": "code",
   "execution_count": 35,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "aggregated_bars = analyis_table.groupby(level=0).sum()\n",
@@ -1132,7 +1144,9 @@
  {
   "cell_type": "code",
   "execution_count": 38,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "exposure_table = pd.DataFrame(exposure[:, :, 0], columns=risk_facto_cols, index=total_data.Date)\n",
@@ -1194,7 +1208,9 @@
  {
   "cell_type": "code",
   "execution_count": 63,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "hist_data = pd.read_csv('portfolio.csv')\n",
@@ -1232,7 +1248,9 @@
  {
   "cell_type": "code",
   "execution_count": 66,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "hist_data = hist_data[hist_data.Date >= '2015-01-09'].reset_index(drop=True)"
@@ -1241,7 +1259,9 @@
  {
   "cell_type": "code",
   "execution_count": 67,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "hist_data['Alpha_Trading'] = hist_data['Alpha_Trading'].groupby(hist_data.Code).fillna(method='pad')"
@@ -1274,7 +1294,9 @@
  {
   "cell_type": "code",
   "execution_count": 84,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "to_explain = hist_data.Alpha_Trading.values * excess_return\n",
@@ -1286,7 +1308,9 @@
  {
   "cell_type": "code",
   "execution_count": 85,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "ret_series = pd.DataFrame(to_explain).groupby(hist_data.Date).sum()"
@@ -1366,7 +1390,9 @@
  {
   "cell_type": "code",
   "execution_count": 90,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "cols = ['idiosyncratic']\n",
@@ -1377,7 +1403,9 @@
  {
   "cell_type": "code",
   "execution_count": 91,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "aggregated_bars = analyis_table.groupby(level=0).sum()"
@@ -1447,7 +1475,9 @@
  {
   "cell_type": "code",
   "execution_count": 80,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
   "outputs": [],
   "source": [
    "exposure_table = pd.DataFrame(exposure[:, :, 0], columns=risk_facto_cols, index=hist_data.Date)\n",

--- a/setup.py
+++ b/setup.py
@@ -38,11 +38,16 @@ def generate_extensions(ext_modules, line_trace=False):
    else:
        define_macros = []
+    if platform.system() != "Windows":
+        extra_compile_args = ['-O3', '-std=c++11']
+    else:
+        extra_compile_args = ['/Ox']
    for pyxfile in ext_modules:
        ext = Extension(name='.'.join(pyxfile.split('/'))[:-4],
                        sources=[pyxfile],
                        define_macros=define_macros,
-                        extra_compile_args=['-std=c++11'])
+                        extra_compile_args=extra_compile_args)
        extensions.append(ext)
    return extensions