Commit 5c4eef61 authored by Dr.李's avatar Dr.李

added factor data packet structure

parent c9c2c89c
...@@ -8,7 +8,7 @@ Created on 2017-5-18 ...@@ -8,7 +8,7 @@ Created on 2017-5-18
import pandas as pd import pandas as pd
def calculate_turn_over(pos_table): def calculate_turn_over(pos_table: pd.DataFrame) -> pd.DataFrame:
turn_over_table = {} turn_over_table = {}
total_factors = pos_table.columns.difference(['Code']) total_factors = pos_table.columns.difference(['Code'])
pos_table.reset_index() pos_table.reset_index()
......
...@@ -5,9 +5,10 @@ Created on 2017-5-25 ...@@ -5,9 +5,10 @@ Created on 2017-5-25
@author: cheng.li @author: cheng.li
""" """
import numpy as np
from typing import Optional from typing import Optional
from typing import List from typing import List
import numpy as np
import pandas as pd
from alphamind.data.neutralize import neutralize from alphamind.data.neutralize import neutralize
from alphamind.portfolio.longshortbulder import long_short_build from alphamind.portfolio.longshortbulder import long_short_build
from alphamind.portfolio.rankbuilder import rank_build from alphamind.portfolio.rankbuilder import rank_build
...@@ -37,13 +38,13 @@ def build_portfolio(er: np.ndarray, ...@@ -37,13 +38,13 @@ def build_portfolio(er: np.ndarray,
builder = builder.lower() builder = builder.lower()
if builder == 'long_short': if builder == 'ls' or builder == 'long_short':
return long_short_build(er, **kwargs) return long_short_build(er, **kwargs)
elif builder == 'rank': elif builder == 'rank':
return rank_build(er, **kwargs) return rank_build(er, **kwargs)
elif builder == 'percent_build': elif builder == 'percent':
return percent_build(er, **kwargs) return percent_build(er, **kwargs)
elif builder == 'linear_prog': elif builder == 'linear_prog' or builder == 'linear':
status, _, weight = linear_build(er, **kwargs) status, _, weight = linear_build(er, **kwargs)
if status != 'optimal': if status != 'optimal':
raise ValueError('linear programming optimizer in status: {0}'.format(status)) raise ValueError('linear programming optimizer in status: {0}'.format(status))
...@@ -51,18 +52,78 @@ def build_portfolio(er: np.ndarray, ...@@ -51,18 +52,78 @@ def build_portfolio(er: np.ndarray,
return weight return weight
if __name__ == '__main__': class FDataPack(object):
from alphamind.data.standardize import standardize def __init__(self,
from alphamind.data.winsorize import winsorize_normal raw_factor: np.ndarray,
factor_name: str=None,
codes: List=None,
groups: Optional[np.ndarray]=None,
benchmark: Optional[np.ndarray]=None,
risk_exp: Optional[np.ndarray]=None,
risk_names: List[str]=None):
raw_factor = np.random.randn(1000, 1) self.raw_factor = raw_factor
pre_process = [winsorize_normal, standardize] if factor_name:
self.factor_name = factor_name
else:
self.factor_name = 'factor'
self.codes = codes
self.groups = groups
self.benchmark = benchmark
self.risk_exp = risk_exp
self.risk_names = risk_names
risk_factors = np.ones((1000, 1)) def benchmark_risk_exp(self) -> np.ndarray:
return self.risk_exp @ self.benchmark
new_factor = factor_processing(raw_factor, def factor_processing(self, pre_process) -> np.ndarray:
if self.risk_exp is None:
return factor_processing(self.raw_factor,
pre_process)
else:
return factor_processing(self.raw_factor,
pre_process, pre_process,
risk_factors) self.risk_exp)
def to_df(self) -> pd.DataFrame:
cols = [self.factor_name]
to_concat = [self.raw_factor]
if self.groups is not None:
cols.append('groups')
to_concat.append(self.groups.reshape(-1, 1))
if self.benchmark is not None:
cols.append('benchmark')
to_concat.append(self.benchmark)
if self.risk_exp is not None:
cols.extend(self.risk_names)
to_concat.append(self.risk_exp)
return pd.DataFrame(np.concatenate(to_concat, axis=1),
columns=cols,
index=self.codes)
if __name__ == '__main__':
raw_factor = np.random.randn(1000, 1)
groups = np.random.randint(30, size=1000)
benchmark = np.random.randn(1000, 1)
risk_exp = np.random.randn(1000, 3)
codes = list(range(1, 1001))
data_pack = FDataPack(raw_factor,
'cfinc1',
codes=codes,
groups=groups,
benchmark=benchmark,
risk_exp=risk_exp,
risk_names=['market', 'size', 'growth'])
print(data_pack.to_df())
print(new_factor.sum())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment