Commit bd3c56b0 authored by Dr.李's avatar Dr.李

replace column names

parent 9e66adbd
......@@ -10,13 +10,13 @@ import pandas as pd
def calculate_turn_over(pos_table: pd.DataFrame) -> pd.DataFrame:
turn_over_table = {}
total_factors = pos_table.columns.difference(['Code'])
total_factors = pos_table.columns.difference(['code'])
pos_table.reset_index()
for name in total_factors:
pivot_position = pos_table.pivot(values=name, columns='Code').fillna(0.)
pivot_position = pos_table.pivot(values=name, columns='code').fillna(0.)
turn_over_series = pivot_position.diff().abs().sum(axis=1)
turn_over_table[name] = turn_over_series.values
turn_over_table = pd.DataFrame(turn_over_table, index=pos_table.Date.unique())
turn_over_table = pd.DataFrame(turn_over_table, index=pos_table.trade_date.unique())
return turn_over_table[total_factors]
This diff is collapsed.
......@@ -41,7 +41,7 @@ class Universe(object):
all_and_conditions.append(univ_out)
if self.exclude_codes:
codes_out = UniverseTable.Code.notin_(self.exclude_codes)
codes_out = UniverseTable.code.notin_(self.exclude_codes)
all_and_conditions.append(codes_out)
all_or_conditions = []
......@@ -50,18 +50,18 @@ class Universe(object):
all_or_conditions.append(univ_in)
if self.include_codes:
codes_in = UniverseTable.Code.in_(self.include_codes)
codes_in = UniverseTable.code.in_(self.include_codes)
all_or_conditions.append(codes_in)
return all_and_conditions, all_or_conditions
def query(self, ref_date):
query = select([UniverseTable.Date, UniverseTable.Code]).distinct()
query = select([UniverseTable.trade_date, UniverseTable.code]).distinct()
all_and_conditions, all_or_conditions = self._create_condition()
query = query.where(
and_(
UniverseTable.Date == ref_date,
UniverseTable.trade_date == ref_date,
or_(
and_(*all_and_conditions),
*all_or_conditions
......@@ -72,10 +72,10 @@ class Universe(object):
return query
def query_range(self, start_date=None, end_date=None, dates=None):
query = select([UniverseTable.Date, UniverseTable.Code]).distinct()
query = select([UniverseTable.trade_date, UniverseTable.code]).distinct()
all_and_conditions, all_or_conditions = self._create_condition()
dates_cond = UniverseTable.Date.in_(dates) if dates else UniverseTable.Date.between(start_date, end_date)
dates_cond = UniverseTable.trade_date.in_(dates) if dates else UniverseTable.trade_date.between(start_date, end_date)
query = query.where(
and_(
......
......@@ -32,13 +32,13 @@ training - every 4 week
engine = SqlEngine('postgresql+psycopg2://postgres:A12345678!@10.63.6.220/alpha')
universe = Universe('zz500', ['zz500'])
neutralize_risk = industry_styles
alpha_factors = risk_styles
alpha_factors = ['BDTO', 'RVOL', 'CHV', 'VAL', 'CFinc1'] # risk_styles
benchmark = 905
n_bins = 5
frequency = '1w'
batch = 4
start_date = '2012-01-01'
end_date = '2017-08-01'
end_date = '2017-08-31'
'''
fetch data from target data base and do the corresponding data processing
......@@ -74,7 +74,7 @@ for train_date in dates:
model.fit(x, y)
model_df.loc[train_date] = copy.deepcopy(model)
print('Date: {0} training finished'.format(train_date))
print('trade_date: {0} training finished'.format(train_date))
'''
predicting phase: using trained model on the re-balance dates
......@@ -89,8 +89,8 @@ final_res = np.zeros((len(dates), n_bins))
for i, predict_date in enumerate(dates):
model = model_df[predict_date]
x = predict_x[predict_date]
benchmark_w = settlement[settlement.Date == predict_date]['weight'].values
realized_r = settlement[settlement.Date == predict_date]['dx'].values
benchmark_w = settlement[settlement.trade_date == predict_date]['weight'].values
realized_r = settlement[settlement.trade_date == predict_date]['dx'].values
predict_y = model.predict(x)
......@@ -100,12 +100,14 @@ for i, predict_date in enumerate(dates):
benchmark=benchmark_w)
final_res[i] = res / benchmark_w.sum()
print('Date: {0} predicting finished'.format(train_date))
print('trade_date: {0} predicting finished'.format(train_date))
last_date = advanceDateByCalendar('china.sse', dates[-1], frequency)
df = pd.DataFrame(final_res, index=dates[1:] + [last_date])
df.sort_index(inplace=True)
df = df.cumsum().plot()
plt.title('Prod factors model training with Linear Regression from 2012 - 2017')
df.cumsum().plot()
plt.title('Risk style factors model training with Linear Regression from 2012 - 2017')
plt.show()
df = df.cumsum()
df.to_csv('d:/20120101_20170823_bt.csv')
......@@ -50,15 +50,15 @@ def prepare_data(engine: SqlEngine,
factor_df = engine.fetch_factor_range(universe,
factors=transformer,
dates=dates,
warm_start=warm_start).sort_values(['Date', 'Code'])
warm_start=warm_start).sort_values(['trade_date', 'code'])
return_df = engine.fetch_dx_return_range(universe, dates=dates, horizon=horizon)
benchmark_df = engine.fetch_benchmark_range(benchmark, dates=dates)
df = pd.merge(factor_df, return_df, on=['Date', 'Code']).dropna()
df = pd.merge(df, benchmark_df, on=['Date', 'Code'], how='left')
df = pd.merge(factor_df, return_df, on=['trade_date', 'code']).dropna()
df = pd.merge(df, benchmark_df, on=['trade_date', 'code'], how='left')
df['weight'] = df['weight'].fillna(0.)
return df[['Date', 'Code', 'dx']], df[['Date', 'Code', 'weight'] + transformer.names]
return df[['trade_date', 'code', 'dx']], df[['trade_date', 'code', 'weight'] + transformer.names]
def batch_processing(x_values,
......@@ -137,10 +137,10 @@ def fetch_data_package(engine: SqlEngine,
if neutralized_risk:
risk_df = engine.fetch_risk_model_range(universe, dates=dates, risk_model=risk_model)[1]
used_neutralized_risk = list(set(neutralized_risk).difference(transformer.names))
risk_df = risk_df[['Date', 'Code'] + used_neutralized_risk].dropna()
risk_df = risk_df[['trade_date', 'code'] + used_neutralized_risk].dropna()
train_x = pd.merge(factor_df, risk_df, on=['Date', 'Code'])
return_df = pd.merge(return_df, risk_df, on=['Date', 'Code'])[['Date', 'Code', 'dx']]
train_x = pd.merge(factor_df, risk_df, on=['trade_date', 'code'])
return_df = pd.merge(return_df, risk_df, on=['trade_date', 'code'])[['trade_date', 'code', 'dx']]
train_y = return_df.copy()
risk_exp = train_x[neutralized_risk].values.astype(float)
......@@ -153,7 +153,7 @@ def fetch_data_package(engine: SqlEngine,
x_values = train_x[transformer.names].values.astype(float)
y_values = train_y[['dx']].values
date_label = pd.DatetimeIndex(factor_df.Date).to_pydatetime()
date_label = pd.DatetimeIndex(factor_df.trade_date).to_pydatetime()
dates = np.unique(date_label)
return_df['weight'] = train_x['weight']
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment