Python_Lecture_6_TimeSeries3_GARCH
December 5, 2023
Statistics and Financial Data Science
1 GARCH examples
[1]: %reset -f
[2]: #import pandas_datareader as pd_data
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
import seaborn as sns
import math as m
import scipy as sp
from statsmodels.stats.anova import anova_lm
import pandas_datareader as pd_data
[3]: # Importing the Time Series Analysis module for simulating data
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima_model import ARMAResults
from statsmodels.tsa.stattools import arma_order_select_ic
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.tsa.stattools as sm_tools
[50]: # ! pip install pmdarima
# ! pip install arch
[5]: from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import pmdarima as pm
from datetime import datetime
1
# For splines
import patsy
from patsy import dmatrix
from arch import arch_model
from arch.univariate import ARX
from arch.univariate import GARCH
import pandas_datareader.data as web
import datetime as dt
import scipy.stats as st
[6]: # Will fix figure size for this notebook
plt.rcParams["figure.figsize"] = (8,6)
# Suppress warnings
import warnings
warnings.filterwarnings('ignore')
[19]: def plot_diagnosticsTight3(data,df=0):
fig, ((ax1,ax2),(ax3,ax4), (ax5,ax6)) = plt.subplots(3,2,figsize=(10,10))
ax1.hist(data, bins =40,color='m', density = True)
mu = data.mean()
sigma = data.std()
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 2419)
ax1.plot(x, stats.norm.pdf(x, mu, sigma), linewidth= 5)
ax1.grid()
ax1.set_title("Hist Data")
ax2.plot(data)
ax2.grid()
ax2.set_title("Data")
fig = plot_acf(data,lags=40,zero=False, ax = ax3,use_vlines =␣
↪True,auto_ylims=True)
ax3.grid()
#logData = np.floor(np.log(len(data)))
ljbox_test = sm.stats.acorr_ljungbox(data,model_df=df)#package version
ax4.plot(ljbox_test['lb_pvalue'])
ax4.axhline(y=0.05, color='r',ls='--')
ax4.set_title("LB test")
ax4.grid()
fig = sm.qqplot(data, line='q', ax = ax5)
ax5.grid()
2
ax6.plot(data*data)
ax6.grid()
ax6.set_title("Data Sqr")
plt.tight_layout()
[8]: import yfinance as yfin
yfin.pdr_override()
start_date = '2007-01-01'
end_date = '2022-01-01'
AAPL = pd_data.data.get_data_yahoo('AAPL', start_date,end_date)
AAPL.tail()
[*********************100%%**********************] 1 of 1 completed
[8]: Open High Low Close Adj Close \
Date
2021-12-27 177.089996 180.419998 177.070007 180.330002 178.292892
2021-12-28 180.160004 181.330002 178.529999 179.289993 177.264603
2021-12-29 179.330002 180.630005 178.139999 179.380005 177.353638
2021-12-30 179.470001 180.570007 178.089996 178.199997 176.186920
2021-12-31 178.089996 179.229996 177.259995 177.570007 175.564056
Volume
Date
2021-12-27 74919600
2021-12-28 79144300
2021-12-29 62348900
2021-12-30 59773000
2021-12-31 64062300
[9]: # Let's look at the returns
AAPLRet = np.log(AAPL['Adj Close']).diff().dropna()
AAPLRet.head()
[9]: Date
2007-01-04 0.021953
2007-01-05 -0.007147
2007-01-08 0.004927
2007-01-09 0.079799
2007-01-10 0.046746
Name: Adj Close, dtype: float64
1) Plot ACF/PACF for first impression on autoregressive features
2) Test for stationarity (otherwise transform)
3
[179]: plot_acf(AAPL['Adj Close'],zero = False, lags = 40);
plot_pacf(AAPL['Adj Close'], zero = False, lags =40);
plt.show()
4
[15]: # Existence of Unit Root
test = adfuller(AAPL['Adj Close'])
print('ADF Statistic: %f' % test[0])
print('p-value: %f' % test[1])
print('Critical Values:i/')
for item, value in test[4].items():
print('\t%s: %.2f' % (item, value))
ADF Statistic: 4.099029
p-value: 1.000000
Critical Values:i/
1%: -3.43
5%: -2.86
10%: -2.57
[16]: # Kpss test rejects stationarity
test = kpss(AAPL['Adj Close'])
print('KPSS Statistic: %f' % test[0])
print('p-value: %f' % test[1])
print('Critical Values:')
5
for item, value in test[3].items():
print('\t%s: %.2f' % (item, value))
KPSS Statistic: 6.719110
p-value: 0.010000
Critical Values:
10%: 0.35
5%: 0.46
2.5%: 0.57
1%: 0.74
[11]: plot_acf(AAPLRet,zero = False, lags = 40,auto_ylims=True);
plot_pacf(AAPLRet, zero = False, lags =40,auto_ylims=True);
plt.show()
6
2 Data passes the stationarity tests - I can consider modelling as
an ARMA process
[12]: # Reject Unit Root
test = adfuller(AAPLRet)
print('ADF Statistic: %f' % test[0])
print('p-value: %f' % test[1])
print('Critical Values:i/')
for item, value in test[4].items():
print('\t%s: %.2f' % (item, value))
ADF Statistic: -13.815719
p-value: 0.000000
Critical Values:i/
1%: -3.43
5%: -2.86
10%: -2.57
7
[13]: # Kpss test supports stationarity
test = kpss(AAPLRet)
print('KPSS Statistic: %f' % test[0])
print('p-value: %f' % test[1])
print('Critical Values:')
for item, value in test[3].items():
print('\t%s: %.2f' % (item, value))
KPSS Statistic: 0.063946
p-value: 0.100000
Critical Values:
10%: 0.35
5%: 0.46
2.5%: 0.57
1%: 0.74
3) Find / fit best ARMA model (already differenced once).
[20]: # Just plotting graphs on the actual data and examining the results fo the␣
↪Ljung-Box test...
# I see that there is auto-correlation
plot_diagnosticsTight3(AAPLRet)
8
[18]: # So can fit an ARMA type model
results = pm.auto_arima(AAPLRet, trace = True, seasonal=False,
stationary = True, stepwise = False,with_intercept=True,
information_criterion='aic', start_q=0, max_p = 6,␣
↪max_q = 1,d=0)
# Order selected is given in summary
print(results.summary())
# This chooses AR 2 model for the mean
ARIMA(0,0,0)(0,0,0)[0] intercept : AIC=-18725.242, Time=0.48 sec
ARIMA(0,0,1)(0,0,0)[0] intercept : AIC=-18728.327, Time=1.39 sec
ARIMA(1,0,0)(0,0,0)[0] intercept : AIC=-18728.142, Time=0.72 sec
ARIMA(1,0,1)(0,0,0)[0] intercept : AIC=-18725.750, Time=3.08 sec
9
ARIMA(2,0,0)(0,0,0)[0] intercept : AIC=-18727.496, Time=2.15 sec
ARIMA(2,0,1)(0,0,0)[0] intercept : AIC=-18725.132, Time=0.84 sec
ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=-18725.559, Time=1.42 sec
ARIMA(3,0,1)(0,0,0)[0] intercept : AIC=-18723.537, Time=1.10 sec
ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=-18728.875, Time=1.10 sec
ARIMA(4,0,1)(0,0,0)[0] intercept : AIC=-18726.957, Time=2.23 sec
ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=-18727.929, Time=3.21 sec
Best model: ARIMA(4,0,0)(0,0,0)[0] intercept
Total fit time: 17.742 seconds
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 3776
Model: SARIMAX(4, 0, 0) Log Likelihood 9370.438
Date: Thu, 16 Nov 2023 AIC -18728.875
Time: 02:38:49 BIC -18691.457
Sample: 0 HQIC -18715.573
- 3776
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.0011 0.000 3.328 0.001 0.000 0.002
ar.L1 -0.0367 0.010 -3.720 0.000 -0.056 -0.017
ar.L2 -0.0180 0.011 -1.701 0.089 -0.039 0.003
ar.L3 -0.0038 0.011 -0.358 0.720 -0.024 0.017
ar.L4 0.0375 0.011 3.360 0.001 0.016 0.059
sigma2 0.0004 4.53e-06 90.407 0.000 0.000 0.000
================================================================================
===
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB):
7812.44
Prob(Q): 0.97 Prob(JB):
0.00
Heteroskedasticity (H): 0.62 Skew:
-0.43
Prob(H) (two-sided): 0.00 Kurtosis:
9.99
================================================================================
===
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-
step).
[ ]: ?pm.auto_arima
10
[22]: model = sm.tsa.SARIMAX(AAPLRet, trend='c',order=(4, 0, 0))
fit = model.fit(disp = False)
print(fit.summary());
SARIMAX Results
==============================================================================
Dep. Variable: Adj Close No. Observations: 3776
Model: SARIMAX(4, 0, 0) Log Likelihood 9370.438
Date: Thu, 16 Nov 2023 AIC -18728.875
Time: 02:43:59 BIC -18691.457
Sample: 0 HQIC -18715.573
- 3776
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.0011 0.000 3.328 0.001 0.000 0.002
ar.L1 -0.0367 0.010 -3.720 0.000 -0.056 -0.017
ar.L2 -0.0180 0.011 -1.701 0.089 -0.039 0.003
ar.L3 -0.0038 0.011 -0.358 0.720 -0.024 0.017
ar.L4 0.0375 0.011 3.360 0.001 0.016 0.059
sigma2 0.0004 4.53e-06 90.407 0.000 0.000 0.000
================================================================================
===
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB):
7812.44
Prob(Q): 0.97 Prob(JB):
0.00
Heteroskedasticity (H): 0.62 Skew:
-0.43
Prob(H) (two-sided): 0.00 Kurtosis:
9.99
================================================================================
===
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-
step).
4) Plot Diagnostics on residuals of the model
[23]: plot_diagnosticsTight3(fit.resid,4)
11
There is no auto-correlation and residuals do not pass the Ljung-Box test, there also seem to be
clusters in the data and also the QQ plot does not fit with normality. There could be due to
heteroskedasticity - so we examine the squared residulas
5) Check squared residuals ACF/PACF - these indicate some AR with order maybe 5 - 9,
depending on how complex we wish model to be.
[25]: plot_acf(fit.resid**2, zero = False,auto_ylims=True);
plot_pacf(fit.resid**2, zero = False,auto_ylims=True);
plt.show()
12
13
[ ]: # Above were examples with SARIMAX which are familiar with. In arch_model we␣
↪need to
# specify a mean model using a different method - this is restricted to AR type␣
↪only, with
# potential external regressors.
6) Combine the models by specifying the mean and vol processes into a GARCH model:
[28]: # The model allows us to choose specific lags - this is very similar to the␣
↪SARIMAX function from statsmodels:
# Diff are possibly due to numerical approx / methods/ tolerance
from arch.univariate import ARX
arx = ARX(AAPLRet, lags= 4,rescale = False) #this is eq. to AR(4) as per above
arx.volatility = GARCH(4,0,0)
res = arx.fit(update_freq=0)#, tol=0.000001
print(res.summary())
Optimization terminated successfully (Exit mode 0)
Current function value: -9583.298788829215
Iterations: 15
14
Function evaluations: 202
Gradient evaluations: 11
AR - ARCH Model Results
==============================================================================
Dep. Variable: Adj Close R-squared: -0.002
Mean Model: AR Adj. R-squared: -0.003
Vol Model: ARCH Log-Likelihood: 9583.30
Distribution: Normal AIC: -19146.6
Method: Maximum Likelihood BIC: -19084.2
No. Observations: 3772
Date: Thu, Nov 16 2023 Df Residuals: 3767
Time: 02:52:50 Df Model: 5
Mean Model
================================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------------
Const 1.6720e-03 3.423e-04 4.885 1.037e-06 [1.001e-03,2.343e-03]
Adj Close[1] 8.1657e-03 2.320e-02 0.352 0.725 [-3.731e-02,5.365e-02]
Adj Close[2] -6.9062e-04 2.755e-02 -2.507e-02 0.980 [-5.468e-02,5.330e-02]
Adj Close[3] -0.0410 2.618e-02 -1.566 0.117 [-9.230e-02,1.032e-02]
Adj Close[4] 8.1371e-03 3.788e-02 0.215 0.830 [-6.610e-02,8.238e-02]
Volatility Model
============================================================================
coef std err t P>|t| 95.0% Conf. Int.
----------------------------------------------------------------------------
omega 2.0393e-04 1.264e-05 16.139 1.355e-58 [1.792e-04,2.287e-04]
alpha[1] 0.0500 1.264e-02 3.955 7.643e-05 [2.522e-02,7.476e-02]
alpha[2] 0.0500 1.336e-02 3.742 1.825e-04 [2.381e-02,7.617e-02]
alpha[3] 0.0500 1.215e-02 4.114 3.885e-05 [2.617e-02,7.380e-02]
alpha[4] 0.0500 1.708e-02 2.927 3.423e-03 [1.651e-02,8.346e-02]
============================================================================
Covariance estimator: robust
• I’m suspiscious on the convergence of the model as all alphas are the same.
• We can try using the rescale function to see if we obtain an improvement or
• We can try to scale the returns (by x 100)… let’s try both and watch the effect on coefficients
[29]: arx = ARX(AAPLRet, lags= 4,rescale = True) #this is eq. to AR(4) as per above
arx.volatility = GARCH(4,0,0)
res = arx.fit(update_freq=0)#, tol=0.000001
print(res.summary())
Optimization terminated successfully (Exit mode 0)
Current function value: 7638.755566561721
Iterations: 22
Function evaluations: 271
Gradient evaluations: 22
AR - ARCH Model Results
15
==============================================================================
Dep. Variable: Adj Close R-squared: -0.009
Mean Model: AR Adj. R-squared: -0.010
Vol Model: ARCH Log-Likelihood: -7638.76
Distribution: Normal AIC: 15297.5
Method: Maximum Likelihood BIC: 15359.9
No. Observations: 3772
Date: Thu, Nov 16 2023 Df Residuals: 3767
Time: 02:55:14 Df Model: 5
Mean Model
================================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------------
Const 0.2004 3.334e-02 6.009 1.865e-09 [ 0.135, 0.266]
Adj Close[1] 0.0240 2.410e-02 0.995 0.320 [-2.326e-02,7.120e-02]
Adj Close[2] 0.0130 2.721e-02 0.479 0.632 [-4.029e-02,6.637e-02]
Adj Close[3] -0.0636 3.166e-02 -2.010 4.448e-02 [ -0.126,-1.569e-03]
Adj Close[4] -5.0302e-03 4.418e-02 -0.114 0.909 [-9.162e-02,8.156e-02]
Volatility Model
==========================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------
omega 1.5967 0.164 9.714 2.624e-22 [ 1.275, 1.919]
alpha[1] 0.1987 4.975e-02 3.995 6.482e-05 [ 0.101, 0.296]
alpha[2] 0.1600 3.829e-02 4.178 2.946e-05 [8.492e-02, 0.235]
alpha[3] 0.1531 3.390e-02 4.517 6.274e-06 [8.668e-02, 0.220]
alpha[4] 0.1451 4.692e-02 3.092 1.990e-03 [5.310e-02, 0.237]
==========================================================================
Covariance estimator: robust
[30]: newAAPLRet =AAPLRet*100
arx = ARX(newAAPLRet, lags=4,rescale = False) #this is eq. to AR(4) as per above
arx.volatility = GARCH(4,0,0)
res = arx.fit(update_freq=0)#, tol=0.000001
print(res.summary())
Optimization terminated successfully (Exit mode 0)
Current function value: 7638.755566561721
Iterations: 22
Function evaluations: 271
Gradient evaluations: 22
AR - ARCH Model Results
==============================================================================
Dep. Variable: Adj Close R-squared: -0.009
Mean Model: AR Adj. R-squared: -0.010
Vol Model: ARCH Log-Likelihood: -7638.76
Distribution: Normal AIC: 15297.5
16
Method: Maximum Likelihood BIC: 15359.9
No. Observations: 3772
Date: Thu, Nov 16 2023 Df Residuals: 3767
Time: 02:55:56 Df Model: 5
Mean Model
================================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------------
Const 0.2004 3.334e-02 6.009 1.865e-09 [ 0.135, 0.266]
Adj Close[1] 0.0240 2.410e-02 0.995 0.320 [-2.326e-02,7.120e-02]
Adj Close[2] 0.0130 2.721e-02 0.479 0.632 [-4.029e-02,6.637e-02]
Adj Close[3] -0.0636 3.166e-02 -2.010 4.448e-02 [ -0.126,-1.569e-03]
Adj Close[4] -5.0302e-03 4.418e-02 -0.114 0.909 [-9.162e-02,8.156e-02]
Volatility Model
==========================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------
omega 1.5967 0.164 9.714 2.624e-22 [ 1.275, 1.919]
alpha[1] 0.1987 4.975e-02 3.995 6.482e-05 [ 0.101, 0.296]
alpha[2] 0.1600 3.829e-02 4.178 2.946e-05 [8.492e-02, 0.235]
alpha[3] 0.1531 3.390e-02 4.517 6.274e-06 [8.668e-02, 0.220]
alpha[4] 0.1451 4.692e-02 3.092 1.990e-03 [5.310e-02, 0.237]
==========================================================================
Covariance estimator: robust
Try different GARCH models, GARCH(1,1) gives the lowest AIC.
[32]: arx = ARX(AAPLRet, lags= 4,rescale = True) #this is eq. to AR(4) as per above
arx.volatility = GARCH(1,0,1)
res = arx.fit(update_freq=0)#, tol=0.000001
print(res.summary())
Optimization terminated successfully (Exit mode 0)
Current function value: 7523.744870927528
Iterations: 14
Function evaluations: 153
Gradient evaluations: 14
AR - GARCH Model Results
==============================================================================
Dep. Variable: Adj Close R-squared: -0.002
Mean Model: AR Adj. R-squared: -0.003
Vol Model: GARCH Log-Likelihood: -7523.74
Distribution: Normal AIC: 15063.5
Method: Maximum Likelihood BIC: 15113.4
No. Observations: 3772
Date: Thu, Nov 16 2023 Df Residuals: 3767
Time: 03:02:20 Df Model: 5
Mean Model
17
================================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------------
Const 0.1860 2.996e-02 6.206 5.431e-10 [ 0.127, 0.245]
Adj Close[1] 0.0124 1.868e-02 0.663 0.507 [-2.422e-02,4.899e-02]
Adj Close[2] 1.2484e-03 1.844e-02 6.768e-02 0.946 [-3.490e-02,3.740e-02]
Adj Close[3] -0.0379 1.825e-02 -2.077 3.781e-02 [-7.367e-02,-2.135e-03]
Adj Close[4] 0.0326 1.809e-02 1.802 7.156e-02 [-2.859e-03,6.803e-02]
Volatility Model
==========================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------
omega 0.1450 3.723e-02 3.895 9.832e-05 [7.202e-02, 0.218]
alpha[1] 0.1163 1.958e-02 5.938 2.889e-09 [7.790e-02, 0.155]
beta[1] 0.8481 2.338e-02 36.272 4.523e-288 [ 0.802, 0.894]
==========================================================================
Covariance estimator: robust
7) Analyse outputs and consider diagnostics of model
[37]: # Exploring the outputs
res.plot()
plt.grid()
plt.show()
18
[40]: # Plot conditional vol vs abs(ret)
plt.figure(1, figsize=(8,4))
plt.plot(res.conditional_volatility/100) # just a rescale
plt.plot(abs(AAPLRet), color = 'grey', alpha = 0.5)
plt.grid()
plt.show()
19
[57]:
7) Consider Diagnostics of StdResiduals: Ljung-Box tests to verify no autocorrelation and nor-
mality tests
[36]: std_resid = res.resid/res.conditional_volatility
std_resid = std_resid.dropna()
plot_diagnosticsTight3(std_resid)
20
• The standardized residuals have passed the Ljung-Box test, supporting the case that they are
white noise
• However, the QQ plot still does not seem to fit a normal, potentially a different distribution
shoudl be considered.
• We should verify by formal normality tests: both reject hypothesis of normality.
[41]: JB_test = stats.jarque_bera(std_resid)
print('Jarque-Bera statitiscs: ', JB_test[0])
print('Jarque-Bera p-value: ', JB_test[1])
Jarque-Bera statitiscs: 1322.4110228965922
Jarque-Bera p-value: 6.951763260803393e-288
21
[42]: shap_test = stats.shapiro(std_resid)
print('Shapiro statitiscs: ', shap_test[0])
print('Shapiro p-value: ', shap_test[1])
Shapiro statitiscs: 0.9734292030334473
Shapiro p-value: 6.251361523493347e-26
[ ]:
[ ]:
22