import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.tsa.api as smt
from arch import arch_model


train = pd.read_csv('train.csv', index_col= 0, parse_dates=[0], squeeze=True)
test = pd.read_csv('test.csv', index_col= 0,parse_dates=[0], squeeze=True)


def tsplot(y, title, lags=None, figsize=(15,10), style='bmh'):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    with plt.style.context(style):
        fig = plt.figure(figsize=figsize)
        layout = (2, 2)
        ts_ax = plt.subplot2grid(layout, (0,0), colspan = 2)
        acf_ax = plt.subplot2grid(layout, (1,0))
        pacf_ax = plt.subplot2grid(layout, (1,1))
        y.plot(ax = ts_ax)
        ts_ax.set_title(title)
        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.05)
        smt.graphics.plot_pacf(y, lags = lags, ax=pacf_ax, alpha=0.05, method = "yw")
        acf_ax.set_title('Autocorrelation')
        pacf_ax.set_title('Partial Autocorrelation')
        plt.tight_layout()
    return


import statsmodels.api as sm
std = np.std(train)
sm.qqplot(train / std, line ='45');


tsplot(train, "Log Returns, ACF, and PACF")


smt.adfuller(train)

(-23.36845488643965,
 0.0,
 9,
 4517,
 {'1%': -3.4317985322285707,
  '5%': -2.862180079546209,
  '10%': -2.567110717704876},
 -26345.66930578957)


import warnings
warnings.filterwarnings('ignore')
order_sel = sm.tsa.arma_order_select_ic(train, ic=["aic", "bic"], max_ar=2, max_ma=2)

p_bic = order_sel.bic_min_order[0]
q_bic = order_sel.bic_min_order[1]
d = 0
p_aic = order_sel.aic_min_order[0]
q_aic = order_sel.aic_min_order[1]
print('BIC Order', (p_bic,d,q_bic))
print('AIC Order', (p_aic,d,q_aic))

BIC Order (0, 0, 0)
AIC Order (0, 0, 2)


print('AIC Model')
model_aic = smt.ARIMA(train, order=(p_aic, d, q_aic))
results_aic = model_aic.fit()
print(results_aic.summary())

AIC Model
                               SARIMAX Results                                
==============================================================================
Dep. Variable:             LogReturns   No. Observations:                 4527
Model:                 ARIMA(0, 0, 2)   Log Likelihood               13208.475
Date:                Thu, 28 Apr 2022   AIC                         -26408.950
Time:                        17:58:55   BIC                         -26383.279
Sample:                             0   HQIC                        -26399.907
                               - 4527                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0002      0.000      1.129      0.259      -0.000       0.001
ma.L1         -0.0116      0.008     -1.450      0.147      -0.027       0.004
ma.L2         -0.0616      0.008     -7.526      0.000      -0.078      -0.046
sigma2         0.0002   1.69e-06    101.494      0.000       0.000       0.000
===================================================================================
Ljung-Box (L1) (Q):                   0.07   Jarque-Bera (JB):             13723.11
Prob(Q):                              0.79   Prob(JB):                         0.00
Heteroskedasticity (H):               0.28   Skew:                            -0.03
Prob(H) (two-sided):                  0.00   Kurtosis:                        11.53
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


tsplot(results_aic.resid, title="ARIMA Residuals", lags=5)


sm.stats.acorr_ljungbox(results_aic.resid,lags=5, boxpierce=True, return_df = True)


sm.stats.acorr_ljungbox(results_aic.resid ** 2,lags=10, boxpierce=True, return_df = True)


# Engle Test
etest = sm.stats.diagnostic.het_arch(results_aic.resid, nlags = 10)
print('pval Lagrange: ' + str(np.round(etest[1],4)))
print('pval F-test: ' + str(np.round(etest[1],4)))
etest

pval Lagrange: 0.0
pval F-test: 0.0

(622.7549183324542,
 2.3389558452228067e-127,
 72.05847611430839,
 2.766030135813962e-137)


gm = arch_model(results_aic.resid * 100, p = 2, q = 2)
res_scaled = gm.fit()

Iteration:      1,   Func. Count:      8,   Neg. LLF: 1779976150.91917
Iteration:      2,   Func. Count:     19,   Neg. LLF: 2280399062.5472245
Iteration:      3,   Func. Count:     29,   Neg. LLF: 9175.840640302666
Iteration:      4,   Func. Count:     38,   Neg. LLF: 6869.603416337753
Iteration:      5,   Func. Count:     46,   Neg. LLF: 6855.874080978718
Iteration:      6,   Func. Count:     54,   Neg. LLF: 6842.038195369938
Iteration:      7,   Func. Count:     62,   Neg. LLF: 7054.343849464374
Iteration:      8,   Func. Count:     70,   Neg. LLF: 6794.190980692841
Iteration:      9,   Func. Count:     78,   Neg. LLF: 6789.471714383795
Iteration:     10,   Func. Count:     86,   Neg. LLF: 6786.867200815041
Iteration:     11,   Func. Count:     93,   Neg. LLF: 6788.225936272537
Iteration:     12,   Func. Count:    101,   Neg. LLF: 6786.181755633428
Iteration:     13,   Func. Count:    108,   Neg. LLF: 6786.047726770072
Iteration:     14,   Func. Count:    115,   Neg. LLF: 6785.980201125069
Iteration:     15,   Func. Count:    122,   Neg. LLF: 6785.960122386743
Iteration:     16,   Func. Count:    129,   Neg. LLF: 6785.960075051431
Iteration:     17,   Func. Count:    136,   Neg. LLF: 6785.9600735354
Iteration:     18,   Func. Count:    142,   Neg. LLF: 6785.960073534739
Optimization terminated successfully    (Exit mode 0)
            Current function value: 6785.9600735354
            Iterations: 18
            Function evaluations: 142
            Gradient evaluations: 18


print(res_scaled.summary())

                     Constant Mean - GARCH Model Results                      
==============================================================================
Dep. Variable:                   None   R-squared:                       0.000
Mean Model:             Constant Mean   Adj. R-squared:                  0.000
Vol Model:                      GARCH   Log-Likelihood:               -6785.96
Distribution:                  Normal   AIC:                           13583.9
Method:            Maximum Likelihood   BIC:                           13622.4
                                        No. Observations:                 4527
Date:                Thu, Apr 28 2022   Df Residuals:                     4526
Time:                        18:03:35   Df Model:                            1
                                 Mean Model                                 
============================================================================
                 coef    std err          t      P>|t|      95.0% Conf. Int.
----------------------------------------------------------------------------
mu             0.0301  1.483e-02      2.030  4.233e-02 [1.043e-03,5.918e-02]
                               Volatility Model                              
=============================================================================
                 coef    std err          t      P>|t|       95.0% Conf. Int.
-----------------------------------------------------------------------------
omega          0.0153  6.743e-03      2.264  2.360e-02  [2.047e-03,2.848e-02]
alpha[1]       0.0724  2.899e-02      2.497  1.252e-02    [1.557e-02,  0.129]
alpha[2]   5.3297e-12  1.778e-02  2.998e-10      1.000 [-3.484e-02,3.484e-02]
beta[1]        0.7434      0.303      2.452  1.421e-02      [  0.149,  1.338]
beta[2]        0.1746      0.304      0.575      0.565      [ -0.420,  0.770]
=============================================================================

Covariance estimator: robust


garch_resid = res_scaled.resid
st_garch_resid = np.divide(garch_resid, res_scaled.conditional_volatility)
fig = res_scaled.plot()


sm.stats.acorr_ljungbox(st_garch_resid ** 2,lags=10, boxpierce=True, return_df = True)


etest_garch = sm.stats.diagnostic.het_arch(st_garch_resid, nlags = 10)
print('pval Lagrange: ' + str(np.round(etest_garch[1],4)))
print('pval F-test: ' + str(np.round(etest_garch[1],4)))
etest_garch

pval Lagrange: 0.4263
pval F-test: 0.4263

(10.162580243357679,
 0.4263473083946894,
 1.0160691924636058,
 0.4267201886626978)


log_returns = pd.concat([train, test])
n_train = len(train)
n_test = len(test)
train_start, train_end = train.index[0].strftime("%Y-%m-%d"), train.index[-1].strftime("%Y-%m-%d")
test_start, test_end = test.index[0].strftime("%Y-%m-%d"), test.index[-1].strftime("%Y-%m-%d")


# Obtain the predictions
arima_predictions = pd.Series(
    data=results_aic.forecast(n_test).to_numpy(),
    index=test.index,
)


PLOT_STYLE = "bmh"
with plt.style.context(PLOT_STYLE):
    fig, ax = plt.subplots(1, 1, figsize=(12, 5))
    log_returns[-100:].plot(ax=ax, label="Actual Log Returns")
    arima_predictions.plot(ax=ax, label="ARIMA(0,0,2) Predictions")
    ax.axvline(train_end, color="gray", alpha=0.9, linestyle="dashed")
    ax.set(
        title="Daily Log-Returns Forecast",
        ylabel="Log(Return)",
        ylim=(-0.05, 0.05),
    )
    ax.legend()
None


# predicted values
forecasts_vol_scaled = res_scaled.forecast(horizon=len(test),reindex = False)
forecasts_df = pd.DataFrame()
forecasts_df['ActualReturn'] = test.values
forecasts_df['PredictedVolScaled'] = np.sqrt(forecasts_vol_scaled.variance.values[-1,:])
forecasts_df['Date'] = test.index
# previous returns and volatilities
pre_df = pd.DataFrame()
pre_df['ActualReturn'] = train.values
pre_df['ConditionalVolScaled'] = res_scaled.conditional_volatility.values
pre_df['Date'] = train.index


fig = plt.figure(figsize=(10,8))
return_ax = fig.add_subplot(2,1,1)
return_ax.set_title("Returns and Volatility using fixed window (Scaled)")
plt.plot(train*100, label = "Actual Returns (2000 - 2017)")
plt.plot(test*100, label = "Actual Returns (Jan 2018)", color = 'orange')
plt.legend();

vol_ax = fig.add_subplot(2,1,2)
vol_ax.set_title("Volatility (Scaled)")
plt.plot(pre_df['Date'],pre_df['ConditionalVolScaled'], label = "Conditional Vol (2000 - 2017)")
plt.plot(forecasts_df['Date'], forecasts_df['PredictedVolScaled'], label = "Predicted Vol (Jan 2018)", color = 'orange')
plt.legend();


fig = plt.figure(figsize=(10,8))
return_ax = fig.add_subplot(2,1,1)
return_ax.set_title("Returns and Volatility using fixed window (Scaled)")
# plt.plot(train*100, label = "Actual Returns (2000 - 2017)")
plt.plot(test*100, label = "Actual Returns (Jan 2018)")
plt.legend();

vol_ax = fig.add_subplot(2,1,2)
vol_ax.set_title("Volatility (Scaled)")
# plt.plot(pre_df['Date'],pre_df['ConditionalVolScaled'], label = "Conditional Vol (2000 - 2017)")
plt.plot(forecasts_df['Date'], forecasts_df['PredictedVolScaled'], label = "Predicted Vol (Jan 2018)")
plt.legend();

	lb_stat	lb_pvalue	bp_stat	bp_pvalue
1	0.069021	0.792767	0.068976	0.792834
2	0.127238	0.938363	0.127141	0.938408
3	0.306741	0.958756	0.306446	0.958812
4	2.168965	0.704715	2.166203	0.705221
5	3.122665	0.681081	3.118428	0.681733

	lb_stat	lb_pvalue	bp_stat	bp_pvalue
1	281.246936	4.016698e-63	281.060639	4.410279e-63
2	603.699672	8.096210e-132	603.228586	1.024653e-131
3	762.998708	4.578316e-165	762.351756	6.324205e-165
4	899.407470	2.238930e-193	898.579804	3.383507e-193
5	1008.370434	9.261532e-216	1007.374355	1.521724e-215
6	1123.685389	1.565177e-239	1122.485618	2.845536e-239
7	1201.343367	3.617657e-255	1199.989275	7.099737e-255
8	1304.572639	2.414423e-276	1302.990617	5.305990e-276
9	1354.352501	5.625355e-286	1352.649575	1.312279e-285
10	1484.584041	0.000000e+00	1482.536054	0.000000e+00

	lb_stat	lb_pvalue	bp_stat	bp_pvalue
1	0.807515	0.368856	0.806981	0.369014
2	7.344085	0.025424	7.337777	0.025505
3	7.415846	0.059761	7.409458	0.059931
4	8.148070	0.086302	8.140712	0.086558
5	8.151305	0.148092	8.143943	0.148479
6	8.511495	0.202972	8.503497	0.203486
7	8.743913	0.271569	8.735453	0.272212
8	9.002884	0.342053	8.993852	0.342815
9	9.747039	0.371349	9.736200	0.372260
10	10.212696	0.422035	10.200623	0.423072

Mean and Volatility Model Work Flow¶

ACF and PACF Plots¶

Stationary¶

ARIMA¶

Diagonostics on ARIMA fit¶

ARCH Effects¶

GARCH model¶

Forecasting¶

ARIMA¶

GARCH¶