Time Series Models 2 Pynb
Time Series Models 2 Pynb
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.statespace.sarimax import SARIMAX
df = pd.DataFrame(np.array(data_),columns=['close'])
df
df.plot(grid=True,label='close_price')
plt.title('Original data plotting')
plt.show()
# Non Stationary data can be observed fromm visual inspection of data
train_data
test_data
close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732
df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])
close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05
# PACF of training data for finding order (p) of AR model
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (AR model of order (4))
model = ARIMA(df_diff_diff,order=(4,0,0))
model_fit = model.fit()
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(4, 0, 0) Log Likelihood
-259.835
Date: Sat, 21 Jun 2025 AIC
531.670
Time: 02:43:18 BIC
549.613
Sample: 0 HQIC
538.961
- 147
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 0.0154 0.071 0.218 0.828 -0.123
0.154
ar.L1 -0.1794 0.088 -2.047 0.041 -0.351
-0.008
ar.L2 0.1247 0.092 1.349 0.177 -0.056
0.306
ar.L3 -0.4136 0.074 -5.578 0.000 -0.559
-0.268
ar.L4 -0.2460 0.091 -2.710 0.007 -0.424
-0.068
sigma2 1.9970 0.251 7.945 0.000 1.504
2.490
======================================================================
=============
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB):
0.41
Prob(Q): 0.99 Prob(JB):
0.82
Heteroskedasticity (H): 1.20 Skew:
0.13
Prob(H) (two-sided): 0.52 Kurtosis:
2.96
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(
fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising
print(reintegrated_seasonal)
return reintegrated_seasonal
yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1
print(forecast)
# After adding trend
print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))
RMSE : 4.651021027785146
MAE : 3.7473731516718236
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# MA model (Moving Average model) : It is a regression of the current
time series on previous white noises. : Yt = b0 + et + b1 et_1 + b2
et_2 + ... + bQ et_q
close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732
[159 rows x 1 columns]
p-value : 1.2982777528844904e-09
# Deseasonalising
df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])
close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (MA model of order (3))
model = ARIMA(df_diff_diff,order=(0,0,3))
model_fit = model.fit()
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(0, 0, 3) Log Likelihood
-245.689
Date: Sat, 21 Jun 2025 AIC
501.379
Time: 02:43:24 BIC
516.331
Sample: 0 HQIC
507.454
- 147
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 0.0004 0.007 0.053 0.958 -0.013
0.013
ma.L1 -0.1466 0.411 -0.357 0.721 -0.952
0.659
ma.L2 0.0565 0.375 0.150 0.880 -0.679
0.792
ma.L3 -0.9070 0.391 -2.319 0.020 -1.674
-0.140
sigma2 1.5848 0.674 2.351 0.019 0.264
2.906
======================================================================
=============
Ljung-Box (L1) (Q): 1.82 Jarque-Bera (JB):
2.15
Prob(Q): 0.18 Prob(JB):
0.34
Heteroskedasticity (H): 1.71 Skew:
0.22
Prob(H) (two-sided): 0.06 Kurtosis:
3.40
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
prediction = model_fit.forecast(len(test_data))
pr = np.array(prediction)
print(pr)
print(len(pr))
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(
fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising
print(reintegrated_seasonal)
return reintegrated_seasonal
yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1
print(forecast)
# After adding trend
print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))
RMSE : 3.913253243425683
MAE : 3.197834646125524
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
ARMA Model
# ARMA Model
# We will see De-trending and then Deseasonalising of the df Dataframe
close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732
# Deseasonalising
df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])
close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05
# PACF and ACF plots of training data for AR and MA model order (p,q)
plot_acf(df_diff_diff.dropna(),lags=20);
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (ARMA model of order (4,3))
model = ARIMA(df_diff_diff,order=(4,0,3))
model_fit = model.fit()
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
The Ljung-Box test is a statistical test used to check for autocorrelation in a time series. It helps
determine if a series of data points are independently distributed or if there's a pattern where
one data point is related to another at a different point in time. This test is crucial for ensuring
the validity of time series models and forecasts, as it helps identify if there's any autocorrelation
remaining in the residuals after a model is fitted.
Purpose: The Ljung-Box test assesses whether a time series is "white noise," meaning the data
points are random and independent of each other. It's often used to check the residuals of a time
series model (like ARIMA) to see if the model has captured all the underlying patterns in the
data. If the test indicates autocorrelation in the residuals, it suggests that the model is not
adequate and there's room for improvement. How it works: The test calculates a statistic (Q)
based on the autocorrelations of the time series at different lags (time differences). It then
compares this statistic to a chi-squared distribution with degrees of freedom related to the
number of lags considered. A small p-value (typically less than 0.05) from the test indicates that
the autocorrelations are significantly different from zero, suggesting the time series is not white
noise.
The null hypothesis of the Jarque-Bera test is that the data follows a normal distribution. In
other words, the test assesses whether the sample data's skewness and kurtosis match those of
a normal distribution. A rejection of the null hypothesis suggests that the data is not normally
distributed.
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(4, 0, 3) Log Likelihood
-238.966
Date: Sat, 21 Jun 2025 AIC
495.932
Time: 02:43:34 BIC
522.846
Sample: 0 HQIC
506.867
- 147
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const -0.0002 0.007 -0.029 0.977 -0.013
0.013
ar.L1 -0.2164 0.107 -2.020 0.043 -0.426
-0.006
ar.L2 0.1019 0.103 0.992 0.321 -0.099
0.303
ar.L3 0.1603 0.093 1.716 0.086 -0.023
0.343
ar.L4 -0.2386 0.102 -2.340 0.019 -0.438
-0.039
ma.L1 0.0161 233.822 6.88e-05 1.000 -458.267
458.299
ma.L2 -0.0180 237.572 -7.58e-05 1.000 -465.651
465.615
ma.L3 -0.9981 233.214 -0.004 0.997 -458.088
456.092
sigma2 1.3986 326.794 0.004 0.997 -639.107
641.904
======================================================================
=============
Ljung-Box (L1) (Q): 0.08 Jarque-Bera (JB):
0.91
Prob(Q): 0.77 Prob(JB):
0.64
Heteroskedasticity (H): 1.26 Skew:
0.18
Prob(H) (two-sided): 0.42 Kurtosis:
3.14
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(
fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising
print(reintegrated_seasonal)
return reintegrated_seasonal
yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1
print(forecast)
# After adding trend
print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))
RMSE : 3.947252317111391
MAE : 3.2236045905484514
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# ARIMA Model Creation and forecasting using the same data
# Deseasonalising
df_diff_diff = train_data.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot ')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])
close
12 0.605552
13 -1.993435
14 -2.296493
15 0.633127
16 1.914023
17 2.570441
18 -1.163555
19 1.098959
20 1.660261
21 5.909820
22 4.994141
23 5.290815
24 4.774688
25 4.623124
p-value : 0.00010159156279840511
# PACF and ACF plots of training data for AR and MA model lags
plot_acf(df_diff_diff.dropna(),lags=20);
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting ARIMA of (4,1,3) : Equivalent to AR_MA model of order
(4,3) after Detrending the data
model = ARIMA(df_diff_diff,order=(4,1,3))
model_fit = model.fit()
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: close No. Observations:
148
Model: ARIMA(4, 1, 3) Log Likelihood
-238.964
Date: Sat, 21 Jun 2025 AIC
493.929
Time: 02:43:39 BIC
517.852
Sample: 0 HQIC
503.649
- 148
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 -0.2154 0.103 -2.096 0.036 -0.417
-0.014
ar.L2 0.1024 0.102 1.002 0.316 -0.098
0.303
ar.L3 0.1582 0.093 1.698 0.090 -0.024
0.341
ar.L4 -0.2384 0.100 -2.380 0.017 -0.435
-0.042
ma.L1 0.0168 773.255 2.17e-05 1.000 -1515.535
1515.568
ma.L2 -0.0172 786.202 -2.19e-05 1.000 -1540.945
1540.911
ma.L3 -0.9995 772.311 -0.001 0.999 -1514.701
1512.701
sigma2 1.3971 1079.464 0.001 0.999 -2114.314
2117.108
======================================================================
=============
Ljung-Box (L1) (Q): 0.09 Jarque-Bera (JB):
0.92
Prob(Q): 0.77 Prob(JB):
0.63
Heteroskedasticity (H): 1.27 Skew:
0.18
Prob(H) (two-sided): 0.41 Kurtosis:
3.14
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(
fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
print(reintegrated_seasonal)
return reintegrated_seasonal
40
print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))
RMSE : 3.9549642531611253
MAE : 3.2295192758152393
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# SARIMA model creation and forecasting of data
# SARIMA is Seasonal + ARIMA
# here we can pass the data without detrending and de-seasonalising
# here we need to mention orders (p,d,q) and (P,D,Q,S)
# we do De-trending the data to get (Yt-T) componet and plot ACF and
PACF and determine accordingly the values of (p,d,q)
# Deseasonalising
df_diff_diff = train_data.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Original Data Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])
close
12 0.605552
13 -1.993435
14 -2.296493
15 0.633127
16 1.914023
17 2.570441
18 -1.163555
19 1.098959
20 1.660261
21 5.909820
22 4.994141
23 5.290815
24 4.774688
25 4.623124
p-value : 0.00010159156279840511
plot_acf(df_diff_diff,lags=20,title='ACF of De-seasonalised data');
plot_pacf(df_diff_diff,lags=20,title='PACF of De-seasonalised data');
P = 1
Q = 2
D = 1
S = 12
S_order = (P,Q,D,S)
close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
======================
Dep. Variable: close No.
Observations: 160
Model: SARIMAX(3, 1, 3)x(1, 2, [1], 12) Log Likelihood
-240.138
Date: Sat, 21 Jun 2025 AIC
498.277
Time: 02:43:48 BIC
524.424
Sample: 0 HQIC
508.902
- 160
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 -0.1093 0.133 -0.824 0.410 -0.370
0.151
ar.L2 0.0671 0.138 0.486 0.627 -0.203
0.338
ar.L3 0.0080 0.127 0.063 0.950 -0.242
0.258
ma.L1 -0.1400 1.148 -0.122 0.903 -2.389
2.109
ma.L2 0.0265 1.028 0.026 0.979 -1.989
2.042
ma.L3 -0.8832 1.042 -0.848 0.397 -2.925
1.158
ar.S.L12 -0.3636 0.109 -3.333 0.001 -0.577
-0.150
ma.S.L12 -0.9968 10.353 -0.096 0.923 -21.288
19.295
sigma2 1.4220 15.330 0.093 0.926 -28.625
31.469
======================================================================
=============
Ljung-Box (L1) (Q): 0.12 Jarque-Bera (JB):
2.13
Prob(Q): 0.73 Prob(JB):
0.34
Heteroskedasticity (H): 1.06 Skew:
0.26
Prob(H) (two-sided): 0.84 Kurtosis:
3.34
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))
fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(train_data.index,train_data,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
print('RMSE : ',np.sqrt(MSE(np.array(test_data),pr)))
print('MAE : ',MAE(np.array(test_data),pr))
RMSE : 3.811545125830629
MAE : 3.120911611139277
forecast = pr
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# create all combinations of differencing orders, applying seasonal
differencing first and then general differencing
def differencing(timeseries, s, D_max=2, d_max=2):
for i in diff_series:
# unpack the results
a, b, c, d, e, f = adfuller(diff_series[i].dropna())
g, h, i = e.values()
results = [a, b, c, d, g, h, i]
summary.append(results)
return summary
# Plot PACF
plt.figure(figsize=(10,3))
plt.plot(PACF, color='k', label='PACF')
plt.plot(PACF_ci, color='tab:blue', linestyle='--', label=['95%
Confidence Interval', ''])
plt.legend()
plt.tight_layout();
df_sp_q = pd.DataFrame()
for i in passed_series:
# unpack the results into ACF and their CI
ACF, ACF_ci = acf(passed_series[i].dropna(), alpha=0.05)
# subtract the upper and lower limits of CI by ACF to centre CI at
zero
ACF_ci_ll = ACF_ci[:,0] - ACF
ACF_ci_ul = ACF_ci[:,1] - ACF
# find positions of significant spikes representing possible value
of q & Q
sp1 = np.where(ACF < ACF_ci_ll)[0]
sp2 = np.where(ACF > ACF_ci_ul)[0]
# ACF values of the significant spikes
sp1_value = abs(ACF[ACF < ACF_ci_ll])
sp2_value = ACF[ACF > ACF_ci_ul]
# store values to dataframe
sp1_series = pd.Series(sp1_value, index=sp1)
sp2_series = pd.Series(sp2_value, index=sp2)
df_sp_q = pd.concat((df_sp_q, sp1_series, sp2_series), axis=1)
df_sp_q = df_sp_q.sort_index() # Sort the dataframe by index
# visualize sums of values of significant spikes in ACF plots ordered
by lags
df_sp_q.iloc[1:].T.sum().plot(kind='bar', title='Candidate MA Terms',
xlabel='nth lag', ylabel='Sum of ACF', figsize=(8,3));
# possible values
p = [1, 2, 3]
d = [0, 1]
q = [1, 2]
P = [0, 1]
D = [0, 1, 2]
Q = [0, 1]
s = [12]
import warnings
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error as mse
warnings.simplefilter("ignore")
def SARIMA_grid(endog, order, seasonal_order):
train_data
import time
start = time.time()
end = time.time()
print(f'time required: {end - start :.2f}')
{"model_id":"c11e696caa0b4a37b08f6d01926d65f1","version_major":2,"vers
ion_minor":0}
model_info = model_info.drop(columns=['MAPE'])
model_info
L1 = model_info[model_info.AIC == model_info.AIC.min()]
L2 = model_info[model_info.BIC == model_info.BIC.min()]
best_models = pd.concat((L1, L2))
best_models
model_fit.summary()
<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
====================
Dep. Variable: close No. Observations:
160
Model: SARIMAX(3, 1, 1)x(0, 1, 1, 12) Log Likelihood
-235.017
Date: Sat, 21 Jun 2025 AIC
482.034
Time: 02:47:06 BIC
499.977
Sample: 0 HQIC
489.325
- 160
======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 0.5453 0.096 5.679 0.000 0.357
0.734
ar.L2 0.2763 0.097 2.835 0.005 0.085
0.467
ar.L3 -0.4616 0.089 -5.206 0.000 -0.635
-0.288
ma.L1 -0.9138 0.052 -17.728 0.000 -1.015
-0.813
ma.S.L12 -0.9061 0.142 -6.365 0.000 -1.185
-0.627
sigma2 1.2307 0.161 7.660 0.000 0.916
1.546
======================================================================
=============
Ljung-Box (L1) (Q): 0.65 Jarque-Bera (JB):
4.16
Prob(Q): 0.42 Prob(JB):
0.13
Heteroskedasticity (H): 1.07 Skew:
0.38
Prob(H) (two-sided): 0.82 Kurtosis:
3.29
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""
prediction = model_fit.forecast(steps=len(test_data))
pr = np.array(prediction)
print(pr)
print(len(pr))
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(train_data.index,train_data,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
print('RMSE : ',np.sqrt(MSE(np.array(test_data),pr)))
print('MAE : ',MAE(np.array(test_data),pr))
RMSE : 3.936924793032358
MAE : 3.219658364456228
forecast = pr
plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()