Commit 2112699d authored by iLampard's avatar iLampard

Add two Quick Start tutorials

parent 8792e2b4
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"alpha-mind的data文件夹提供了对于因子数据进行排序和求分位数的工具函数"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 因子排序: *rank*\n",
"- 从小到大排序,返回序列值。\n",
"- 可以进行整体排序,也可以分行业(分组)排序。"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\workarea\\github\\alpha-mind\\alphamind\\data\\rank.py:17: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
" x = x.reshape((-1, 1))\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>factor_1</th>\n",
" <th>factor_2</th>\n",
" <th>rank_1</th>\n",
" <th>rank_2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.940105</td>\n",
" <td>0.328598</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.473932</td>\n",
" <td>0.334819</td>\n",
" <td>7.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.337995</td>\n",
" <td>0.335863</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.371221</td>\n",
" <td>0.286139</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.462262</td>\n",
" <td>0.182403</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.126732</td>\n",
" <td>0.843093</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.762878</td>\n",
" <td>0.472779</td>\n",
" <td>8.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.075146</td>\n",
" <td>0.924889</td>\n",
" <td>0.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.238197</td>\n",
" <td>0.206311</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.112166</td>\n",
" <td>0.240062</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" factor_1 factor_2 rank_1 rank_2\n",
"0 0.940105 0.328598 9.0 4.0\n",
"1 0.473932 0.334819 7.0 5.0\n",
"2 0.337995 0.335863 4.0 6.0\n",
"3 0.371221 0.286139 5.0 3.0\n",
"4 0.462262 0.182403 6.0 0.0\n",
"5 0.126732 0.843093 2.0 8.0\n",
"6 0.762878 0.472779 8.0 7.0\n",
"7 0.075146 0.924889 0.0 9.0\n",
"8 0.238197 0.206311 3.0 1.0\n",
"9 0.112166 0.240062 1.0 2.0"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from alphamind.data.rank import rank\n",
"\n",
"# 假设有10只股票,每只股票有2个因子,构成一个矩阵\n",
"factors = pd.DataFrame(np.random.rand(10, 2))\n",
"factors.columns = ['factor_1', 'factor_2']\n",
"factors['rank_1'] = rank(factors['factor_1'])\n",
"factors['rank_2'] = rank(factors['factor_2'])\n",
"\n",
"factors\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"E:\\workarea\\github\\alpha-mind\\alphamind\\data\\rank.py:17: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
" x = x.reshape((-1, 1))\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>factor_1</th>\n",
" <th>rank</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.765457</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.162792</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.431309</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.633497</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.943491</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.477439</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.742096</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.561797</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.974109</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.921705</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" factor_1 rank\n",
"0 0.765457 3\n",
"1 0.162792 0\n",
"2 0.431309 1\n",
"3 0.633497 2\n",
"4 0.943491 4\n",
"5 0.477439 0\n",
"6 0.742096 2\n",
"7 0.561797 1\n",
"8 0.974109 4\n",
"9 0.921705 3"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 假设有10只股票,每只股票有1个因子\n",
"factors = pd.DataFrame(np.random.rand(10, 1))\n",
"factors.columns = ['factor_1']\n",
"\n",
"# 假设这10只股票分为两个行业,前5个和后5个分属不同类别\n",
"industry = np.concatenate([np.array([1.0]*5), np.array([2.0]*5)])\n",
"\n",
"factors['rank'] = rank(factors['factor_1'], groups=industry)\n",
"factors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 因子分位数: *quantile*\n",
"- 根据给定组数*(n_bins)*,按从小达到的顺序进行分组,返回每个因子属于的组别。"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>factor_1</th>\n",
" <th>rank</th>\n",
" <th>quantile</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.765457</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.162792</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.431309</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.633497</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.943491</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.477439</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.742096</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.561797</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.974109</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.921705</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" factor_1 rank quantile\n",
"0 0.765457 3 3\n",
"1 0.162792 0 0\n",
"2 0.431309 1 0\n",
"3 0.633497 2 2\n",
"4 0.943491 4 4\n",
"5 0.477439 0 1\n",
"6 0.742096 2 2\n",
"7 0.561797 1 1\n",
"8 0.974109 4 4\n",
"9 0.921705 3 3"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from alphamind.data.quantile import quantile\n",
"\n",
"factors['quantile'] = quantile(factors['factor_1'], n_bins=5)\n",
"factors\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment