O objetivo desse exercicio e' prever um dado termo da curva de juros (real ou nominal) brasileira ou americana usando tecnicas de machine learning.
Como coletamos tanto os dados da curva nominal e real, podemos facilmente calcular a inflacao implicita para o Brasil e o Estados Unidos. Dados de curva de juros tendem a presentar problemas de multicolinearidade dado que normalmente um terno (eg: 5 anos) pode ser aproximado por uma cominacao de outros termos (eg: 3 e 7 anos). Desse modo, na parte de previsao do exercicio criamos a seguinte regra de bolso para os modelos benchmark (lasso e ridge) : se a curva ser estimada e' a brasileira entao as variaveis independentes serao da curva americana e vice-e-versa. Em termos de dados faltantes, optou-se por preencher tais valores usando interpolacoes lineares. O legal de trabalhar com dados da curva de juros elas capturam a precificacao de variaveis economica importantes tais como juros nominal, juros real e inflacao.
importando as bibliotecas e declarando as funcoes
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('fivethirtyeight')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import scipy.stats as stats
from scipy.stats import norm
from scipy.special import boxcox1p
import statsmodels
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn.linear_model import Lasso, LassoCV, Ridge, RidgeCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
def processData(data, lb):
X, Y = [], []
for i in range(len(data)-lb-1):
X.append(data[i:(i+lb)])
Y.append(data[i+lb])
return np.array(X), np.array(Y)
def RMSLE(predictions, realizations):
predictions_use = predictions.clip(0)
rmsle = np.sqrt(np.mean(np.array(np.log(predictions_use + 1) -
np.log(realizations + 1))**2))
return rmsle
Construindo e limpado a base dados
# import Brazil nominal and real fixed-rate data
bra_nominal = pd.read_csv('Downloads/Curva_zero_prefixados.csv', parse_dates=['Date'], index_col = 'Date')
bra_nominal.tail()
bra_real = pd.read_csv('Downloads/Curva_zero_indice_de_precos.csv', parse_dates=['Date'], index_col = 'Date')
bra_real.tail()
for c in bra_nominal.columns:
bra_nominal = bra_nominal.rename(columns = {c: 'BR_'+c})
bra_nominal.tail()
for c in bra_real.columns:
bra_real = bra_real.rename(columns = {c: 'BR_'+c})
bra_real.tail()
bra_nominal = bra_nominal.drop(bra_nominal.columns[[0,1,2,3,5,7,9,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36]], axis=1)
df_bra = pd.merge(bra_nominal, bra_real, on='Date')
df_bra['BR_inf_6M'] = (((1+df_bra['BR_6M_N']/100)/(1+df_bra['BR_6M_R']/100))-1)*100
df_bra['BR_inf_1Y'] = (((1+df_bra['BR_1Y_N']/100)/(1+df_bra['BR_1Y_R']/100))-1)*100
df_bra['BR_inf_2Y'] = (((1+df_bra['BR_2Y_N']/100)/(1+df_bra['BR_2Y_R']/100))-1)*100
df_bra['BR_inf_3Y'] = (((1+df_bra['BR_3Y_N']/100)/(1+df_bra['BR_3Y_R']/100))-1)*100
df_bra['BR_inf_4Y'] = (((1+df_bra['BR_4Y_N']/100)/(1+df_bra['BR_4Y_R']/100))-1)*100
df_bra['BR_inf_5Y'] = (((1+df_bra['BR_5Y_N']/100)/(1+df_bra['BR_5Y_R']/100))-1)*100
df_bra['BR_inf_10Y'] = (((1+df_bra['BR_10Y_N']/100)/(1+df_bra['BR_10Y_R']/100))-1)*100
inf_bra = df_bra[['BR_inf_6M','BR_inf_1Y','BR_inf_2Y','BR_inf_3Y','BR_inf_4Y','BR_inf_5Y','BR_inf_10Y']]
bra_nominal_summ = bra_nominal.describe()
bra_nominal_summ = bra_nominal_summ.transpose()
bra_nominal_summ
bra_real_summ = bra_real.describe()
bra_real_summ = bra_real_summ.transpose()
bra_real_summ
inf_bra_summ = inf_bra.describe()
inf_bra_summ = inf_bra_summ.transpose()
inf_bra_summ
# import US nominal and real fixed-rate data
us_nominal = pd.read_csv('Downloads/USTREASURY-YIELD.csv')
us_nominal = us_nominal.rename(columns = {
'1 MO':'1M_N',
'2 MO':'2M_N',
'3 MO':'3M_N',
'6 MO':'6M_N',
'1 YR':'1Y_N',
'2 YR':'2Y_N',
'3 YR':'3Y_N',
'5 YR':'5Y_N',
'7 YR':'7Y_N',
'10 YR':'10Y_N',
'20 YR':'20Y_N',
'30 YR':'30Y_N'})
us_nominal['Date']=pd.to_datetime(us_nominal['Date'])
us_nominal = us_nominal.set_index('Date')
for c in us_nominal.columns:
us_nominal[c] = pd.to_numeric(us_nominal[c], errors='coerce')
us_nominal= us_nominal['2020-10-09':'2007-01-02']
us_nominal.tail()
us_real = pd.read_csv('Downloads/USTREASURY-REALYIELD.csv')
us_real = us_real.rename(columns = {
'5 YR':'5Y_R',
'7 YR':'7Y_R',
'10 YR':'10Y_R',
'20 YR':'20Y_R',
'30 YR':'30Y_R'})
us_real['Date']=pd.to_datetime(us_real['Date'])
us_real = us_real.set_index('Date')
for c in us_real.columns:
us_real[c] = pd.to_numeric(us_real[c], errors='coerce')
us_real= us_real['2020-10-09':'2007-01-02']
us_real.tail()
for c in us_nominal.columns:
us_nominal = us_nominal.rename(columns = {c: 'US_'+c})
us_nominal.tail()
for c in us_real.columns:
us_real = us_real.rename(columns = {c: 'US_'+c})
us_real.tail()
us_nominal = us_nominal.drop(us_nominal.columns[[0,1]], axis=1)
df_us = pd.merge(us_nominal, us_real, on='Date')
df_us['US_inf_5Y'] = (((1+df_us['US_5Y_N']/100)/(1+df_us['US_5Y_R']/100))-1)*100
df_us['US_inf_7Y'] = (((1+df_us['US_7Y_N']/100)/(1+df_us['US_7Y_R']/100))-1)*100
df_us['US_inf_10Y'] = (((1+df_us['US_10Y_N']/100)/(1+df_us['US_10Y_R']/100))-1)*100
df_us['US_inf_20Y'] = (((1+df_us['US_20Y_N']/100)/(1+df_us['US_20Y_R']/100))-1)*100
df_us['US_inf_30Y'] = (((1+df_us['US_30Y_N']/100)/(1+df_us['US_30Y_R']/100))-1)*100
inf_us = df_us[['US_inf_5Y','US_inf_7Y','US_inf_10Y','US_inf_20Y','US_inf_30Y']]
us_nominal_summ = us_nominal.describe()
us_nominal_summ = us_nominal_summ.transpose()
us_nominal_summ
us_real_summ = us_real.describe()
us_real_summ = us_real_summ.transpose()
us_real_summ
inf_us_summ = inf_us.describe()
inf_us_summ = inf_us_summ.transpose()
inf_us_summ
df_all = pd.merge(df_bra, df_us, on='Date')
df_all = df_all.interpolate(limit_direction='both')
df_all['BR_inv_steepness'] = df_all['BR_10Y_N'] - df_all['BR_2Y_N']
df_all['BR_inv_steepness_D'] =np.where(df_all['BR_inv_steepness'] < 0, 1, 0)
Charts para analise (code)
#Charts
plt.plot(bra_nominal_summ['max'])
fig, ax = plt.subplots(3, 1, figsize=(7,21))
ax[0].plot(bra_nominal_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[0].plot( bra_nominal_summ['mean'] + bra_nominal_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[0].plot( bra_nominal_summ['mean'], color='blue')
ax[0].plot(bra_nominal_summ['mean'] - bra_nominal_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[0].plot( bra_nominal_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[0].fill_between(bra_nominal_summ.index, bra_nominal_summ['min'], bra_nominal_summ['mean'] - bra_nominal_summ['std'], color='r', alpha=.1)
ax[0].fill_between(bra_nominal_summ.index, bra_nominal_summ['mean'] + bra_nominal_summ['std'], bra_nominal_summ['max'], color='r', alpha=.1)
ax[0].fill_between(bra_nominal_summ.index, bra_nominal_summ['mean'] - bra_nominal_summ['std'], bra_nominal_summ['mean'] + bra_nominal_summ['std'], color='b', alpha=.1)
ax[0].legend(fontsize=12, loc = 'upper left');
ax[0].set_ylim(-2.5, 20)
ax[0].set_xlabel('Bond duration')
ax[0].set_ylabel('Yield [%]')
ax[0].set_title('Brazil Nominal Yield Curve')
ax[1].plot(bra_real_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[1].plot( bra_real_summ['mean'] - bra_real_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[1].plot( bra_real_summ['mean'], color='blue')
ax[1].plot(bra_real_summ['mean'] + bra_real_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[1].plot( bra_real_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[1].fill_between(bra_real_summ.index, bra_real_summ['min'], bra_real_summ['mean'] - bra_real_summ['std'], color='r', alpha=.1)
ax[1].fill_between(bra_real_summ.index, bra_real_summ['mean'] + bra_real_summ['std'], bra_real_summ['max'], color='r', alpha=.1)
ax[1].fill_between(bra_real_summ.index, bra_real_summ['mean'] - bra_real_summ['std'], bra_real_summ['mean'] + bra_real_summ['std'], color='b', alpha=.1)
ax[1].legend(fontsize=12);
ax[1].set_ylim(-2.5, 20)
ax[1].set_xlabel('Bond duration')
ax[1].set_ylabel('Yield [%]')
ax[1].set_title('Brazil Real Yield Curve')
ax[2].plot(inf_bra_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[2].plot( inf_bra_summ['mean'] - inf_bra_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[2].plot( inf_bra_summ['mean'], color='blue')
ax[2].plot(inf_bra_summ['mean'] + inf_bra_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[2].plot( inf_bra_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[2].fill_between(inf_bra_summ.index, inf_bra_summ['min'], inf_bra_summ['mean'] - inf_bra_summ['std'], color='r', alpha=.1)
ax[2].fill_between(inf_bra_summ.index, inf_bra_summ['mean'] + inf_bra_summ['std'], inf_bra_summ['max'], color='r', alpha=.1)
ax[2].fill_between(inf_bra_summ.index, inf_bra_summ['mean'] - inf_bra_summ['std'], inf_bra_summ['mean'] + inf_bra_summ['std'], color='b', alpha=.1)
ax[2].legend(fontsize=12);
ax[2].set_ylim(-2.5, 20)
ax[2].set_xlabel('Inflation Horizon')
ax[2].set_ylabel('Inflation [%]')
ax[2].set_title('Brazil Implicit inflation')
plt.plot(us_nominal_summ['max'])
fig, ax = plt.subplots(3, 1, figsize=(7,21))
ax[0].plot(us_nominal_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[0].plot( us_nominal_summ['mean'] + us_nominal_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[0].plot( us_nominal_summ['mean'], color='blue')
ax[0].plot(us_nominal_summ['mean'] - us_nominal_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[0].plot( us_nominal_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[0].fill_between(us_nominal_summ.index, us_nominal_summ['min'], us_nominal_summ['mean'] - us_nominal_summ['std'], color='r', alpha=.1)
ax[0].fill_between(us_nominal_summ.index, us_nominal_summ['mean'] + us_nominal_summ['std'], us_nominal_summ['max'], color='r', alpha=.1)
ax[0].fill_between(us_nominal_summ.index, us_nominal_summ['mean'] - us_nominal_summ['std'], us_nominal_summ['mean'] + us_nominal_summ['std'], color='b', alpha=.1)
ax[0].legend(fontsize=12, loc = 'upper left');
ax[0].set_ylim(-2.5, 7.5)
ax[0].set_xlabel('Bond duration')
ax[0].set_ylabel('Yield [%]')
ax[0].set_title('US Nominal Yield Curve')
ax[1].plot(us_real_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[1].plot( us_real_summ['mean'] - us_real_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[1].plot( us_real_summ['mean'], color='blue')
ax[1].plot(us_real_summ['mean'] + us_real_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[1].plot( us_real_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[1].fill_between(us_real_summ.index, us_real_summ['min'], us_real_summ['mean'] - us_real_summ['std'], color='r', alpha=.1)
ax[1].fill_between(us_real_summ.index, us_real_summ['mean'] + us_real_summ['std'], us_real_summ['max'], color='r', alpha=.1)
ax[1].fill_between(us_real_summ.index, us_real_summ['mean'] - us_real_summ['std'], us_real_summ['mean'] + us_real_summ['std'], color='b', alpha=.1)
ax[1].legend(fontsize=12);
ax[1].set_ylim(-2.5, 7.5)
ax[1].set_xlabel('Bond duration')
ax[1].set_ylabel('Yield [%]')
ax[1].set_title('US Real Yield Curve')
ax[2].plot(inf_us_summ['max'], color='red', linestyle = '-.', linewidth=1, label = 'max')
ax[2].plot( inf_us_summ['mean'] - inf_us_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean - std')
ax[2].plot( inf_us_summ['mean'], color='blue')
ax[2].plot(inf_us_summ['mean'] + inf_us_summ['std'], color='blue', linestyle = '-.', linewidth=1, label = 'mean + std')
ax[2].plot( inf_us_summ['min'], color='red', linestyle = '-.', linewidth=1, label = 'min')
ax[2].fill_between(inf_us_summ.index, inf_us_summ['min'], inf_us_summ['mean'] - inf_us_summ['std'], color='r', alpha=.1)
ax[2].fill_between(inf_us_summ.index, inf_us_summ['mean'] + inf_us_summ['std'], inf_us_summ['max'], color='r', alpha=.1)
ax[2].fill_between(inf_us_summ.index, inf_us_summ['mean'] - inf_us_summ['std'], inf_us_summ['mean'] + inf_us_summ['std'], color='b', alpha=.1)
ax[2].legend(fontsize=12);
ax[2].set_ylim(-2.5, 7.5)
ax[2].set_xlabel('Inflation Horizon')
ax[2].set_ylabel('Inflation [%]')
ax[2].set_title('US Implicit inflation')
fig, ax = plt.subplots(4,3,figsize=(24, 12))
for i, c in enumerate(df_us.columns):
ax[i//3][i%3].plot(df_us[c], label = c )
ax[i//3][i%3].legend()
ax[i//3][i%3].set_ylabel('Yield [%]')
fig.delaxes(ax[3][1])
fig, ax = plt.subplots(5,3,figsize=(24, 12))
for i, c in enumerate(df_bra.columns):
ax[i//3][i%3].plot(df_bra[c], label = c )
ax[i//3][i%3].set_ylabel('Yield [%]')
ax[i//3][i%3].legend()
fig.delaxes(ax[3][2])
f, ax = plt.subplots(1,2, figsize=(18,6))
corrmatrixus = df_us.corr()
sns.heatmap(corrmatrixus, square=True, ax=ax[0])
ax[0].set_title('US')
corrmatrixj = df_bra.corr()
sns.heatmap(corrmatrixj, square=True, ax=ax[1])
ax[1].set_title('Brazil')
f, ax = plt.subplots(figsize=(22,12))
corrmatrix = df_all.corr()
sns.heatmap(corrmatrix, square=True, ax=ax)
ax.set_title('Brazil and US')
fig, ax = plt.subplots(figsize=(18,10.5))
diff = df_bra['BR_10Y_N'] - df_bra['BR_2Y_N']
plt.plot(df_bra.index, diff)
plt.fill_between(df_bra.index, 0, diff, where= diff < 0, color='red')
plt.fill_between(df_bra.index, 0, diff, where= diff > 0, color='blue')
plt.ylabel('BR_Nominal_Yield$_{10Y}$ - BR_Nominal_Yield$_{2Y}$')
fig, ax = plt.subplots(figsize=(18,10.5))
diff = df_us['US_5Y_N'] - df_us['US_1Y_N']
plt.plot(df_us.index, diff)
plt.fill_between(df_us.index, 0, diff, where= diff < 0, color='red')
plt.fill_between(df_us.index, 0, diff, where= diff > 0, color='blue')
plt.ylabel('US_Nominal_Yield$_{10Y}$ - US_Nominal_Yield$_{2Y}$')
Charts







