KEMBAR78
Time Series Models 2 Pynb | PDF | Autoregressive Integrated Moving Average | Estimation Theory
0% found this document useful (0 votes)
20 views63 pages

Time Series Models 2 Pynb

Uploaded by

Terence Ng
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
20 views63 pages

Time Series Models 2 Pynb

Uploaded by

Terence Ng
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 63

#!

pip install --upgrade numpy pandas scikit-learn # or other relevant


libraries

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_process import arma_generate_sample
from statsmodels.tsa.statespace.sarimax import SARIMAX

ma_data = arma_generate_sample(ar=np.array([1.0]), ma=np.array([1.0,


0.7, 0.8]), nsample=200, scale=1, burnin=1000)
n = 100
time = np.arange(200)
trend = time * 0.2
seasonality = 2*np.sin(2*np.pi*time/12) # Seasonality 12
data_ = time_series_ar = trend + seasonality + ma_data

df = pd.DataFrame(np.array(data_),columns=['close'])
df

{"summary":"{\n \"name\": \"df\",\n \"rows\": 200,\n \"fields\": [\


n {\n \"column\": \"close\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 11.9834479732621,\n
\"min\": -3.6445210775896832,\n \"max\": 40.455921833410194,\n
\"num_unique_values\": 200,\n \"samples\": [\n
17.59394884297853,\n 6.2673986021411885,\n
4.096005031238933\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n }\n ]\
n}","type":"dataframe","variable_name":"df"}

df.plot(grid=True,label='close_price')
plt.title('Original data plotting')
plt.show()
# Non Stationary data can be observed fromm visual inspection of data

print('p value of the data : ',adfuller(df['close'])[1])

p value of the data : 0.9067678556529257

# Train and Test splitting data


k = 0.8
train_size = int(len(df)*k)
train_data = df[:train_size]
test_data = df[train_size:]
print('Train_data shape : ',train_data.shape,' test_data shape :
',test_data.shape)

Train_data shape : (160, 1) test_data shape : (40, 1)

train_data

{"summary":"{\n \"name\": \"train_data\",\n \"rows\": 160,\n


\"fields\": [\n {\n \"column\": \"close\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
9.71712908669858,\n \"min\": -3.6445210775896832,\n
\"max\": 34.20894246053696,\n \"num_unique_values\": 160,\n
\"samples\": [\n 18.68097495712577,\n
21.76625072474885,\n 25.727888287444575\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"train_data"}

test_data

{"summary":"{\n \"name\": \"test_data\",\n \"rows\": 40,\n


\"fields\": [\n {\n \"column\": \"close\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.6482908606836437,\n \"min\": 30.306172594639495,\n
\"max\": 40.455921833410194,\n \"num_unique_values\": 40,\n
\"samples\": [\n 37.23189179949507,\n
34.62804134499462,\n 33.41869736828887\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"test_data"}

# AR model Creation - Auto_Regressive Model is of the form : Yt


= a0 + a1 Yt_1 + a2 Yt_2 + ... aP Yt_p + et

# differencing the data to remove trend #


Detrending the data
df_diff = train_data.diff().dropna()
df_diff.plot(grid=True, title='De-trended plot')
print(df_diff)
print('p-value : ',adfuller(df_diff.dropna())[1]) # p-
value is 0 meaning stationary data

close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732

[159 rows x 1 columns]


p-value : 1.2982777528844904e-09
# Deseasonalising

df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])

close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05
# PACF of training data for finding order (p) of AR model

plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (AR model of order (4))
model = ARIMA(df_diff_diff,order=(4,0,0))
model_fit = model.fit()

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)

model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(4, 0, 0) Log Likelihood
-259.835
Date: Sat, 21 Jun 2025 AIC
531.670
Time: 02:43:18 BIC
549.613
Sample: 0 HQIC
538.961
- 147

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 0.0154 0.071 0.218 0.828 -0.123
0.154
ar.L1 -0.1794 0.088 -2.047 0.041 -0.351
-0.008
ar.L2 0.1247 0.092 1.349 0.177 -0.056
0.306
ar.L3 -0.4136 0.074 -5.578 0.000 -0.559
-0.268
ar.L4 -0.2460 0.091 -2.710 0.007 -0.424
-0.068
sigma2 1.9970 0.251 7.945 0.000 1.504
2.490
======================================================================
=============
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB):
0.41
Prob(Q): 0.99 Prob(JB):
0.82
Heteroskedasticity (H): 1.20 Skew:
0.13
Prob(H) (two-sided): 0.52 Kurtosis:
2.96
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))

[-0.24395575 0.99398945 -1.21530272 -0.20950739 -0.43875788


0.33712729
0.29676158 0.2481745 -0.01268371 -0.14610412 -0.12467083 -
0.02530243
0.07889835 0.09654973 0.06000274 0.0012162 -0.02572793 -
0.01744982
0.01101127 0.03254325 0.03543328 0.0237911 0.01033274
0.00480342
0.0082218 0.01534968 0.02009488 0.02007861 0.01688406
0.01373901
0.01274435 0.01385597 0.01561923 0.01662658 0.01645061
0.01560501
0.01488435 0.01473319 0.0150635 0.01549148]
40

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(

fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising

print(reintegrated_seasonal)
return reintegrated_seasonal

forecast = add_seasonality(pr, df_diff['close'], 12)


# After adding Seasonality

[-2.00034298 -0.05838706 2.73189784 -0.72203109 -0.86732663 -


1.94310309
0.07459317 0.79792634 0.80535861 1.26097233 0.32503058
2.63342989
-1.92144462 0.03816267 2.79190058 -0.72081489 -0.89305456 -
1.96055291
0.08560444 0.83046959 0.84079189 1.28476343 0.33536332
2.63823331
-1.91322282 0.05351235 2.81199546 -0.70073628 -0.8761705 -
1.9468139
0.09834879 0.84432557 0.85641112 1.30139002 0.35181393
2.65383832
-1.89833846 0.06824553 2.82705896 -0.6852448 ]

# Adding Trend to predicted data

yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1

print(forecast)
# After adding trend

[32.20859948 32.15021242 34.88211026 34.16007918 33.29275255


31.34964946
31.42424263 32.22216897 33.02752758 34.28849991 34.6135305
37.24696039
35.32551577 35.36367844 38.15557902 37.43476413 36.54170957
34.58115666
34.6667611 35.4972307 36.33802259 37.62278603 37.95814935
40.59638266
38.68315984 38.73667219 41.54866765 40.84793137 39.97176087
38.02494697
38.12329576 38.96762133 39.82403245 41.12542246 41.47723639
44.13107471
42.23273624 42.30098178 45.12804074 44.44279594]

# Mean squared Error

def MSE(y_true, y_pred):


mse = np.mean(np.power(y_true-y_pred, 2))
return mse

# Mean Absolute Error

def MAE(y_true, y_pred):


mae = np.mean(np.abs(y_true-y_pred))
return mae

print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))

RMSE : 4.651021027785146
MAE : 3.7473731516718236

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# MA model (Moving Average model) : It is a regression of the current
time series on previous white noises. : Yt = b0 + et + b1 et_1 + b2
et_2 + ... + bQ et_q

# differencing the data to remove trend #


Detrending the data
df_diff = train_data.diff().dropna()
df_diff.plot(grid=True, title='De-trended plot')
print(df_diff)
print('p-value : ',adfuller(df_diff.dropna())[1]) # p-
value is 0 meaning stationary data

close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732
[159 rows x 1 columns]
p-value : 1.2982777528844904e-09

# Deseasonalising

df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])

close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05

# ACF of training data for finding order (q) of MA model

plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (MA model of order (3))
model = ARIMA(df_diff_diff,order=(0,0,3))
model_fit = model.fit()

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results

======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(0, 0, 3) Log Likelihood
-245.689
Date: Sat, 21 Jun 2025 AIC
501.379
Time: 02:43:24 BIC
516.331
Sample: 0 HQIC
507.454
- 147

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const 0.0004 0.007 0.053 0.958 -0.013
0.013
ma.L1 -0.1466 0.411 -0.357 0.721 -0.952
0.659
ma.L2 0.0565 0.375 0.150 0.880 -0.679
0.792
ma.L3 -0.9070 0.391 -2.319 0.020 -1.674
-0.140
sigma2 1.5848 0.674 2.351 0.019 0.264
2.906
======================================================================
=============
Ljung-Box (L1) (Q): 1.82 Jarque-Bera (JB):
2.15
Prob(Q): 0.18 Prob(JB):
0.34
Heteroskedasticity (H): 1.71 Skew:
0.22
Prob(H) (two-sided): 0.06 Kurtosis:
3.40
======================================================================
=============
Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

prediction = model_fit.forecast(len(test_data))
pr = np.array(prediction)
print(pr)
print(len(pr))

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(

[ 7.67915941e-01 1.46132981e+00 -3.39254224e+00 3.51326221e-04


3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04
3.51326221e-04 3.51326221e-04 3.51326221e-04 3.51326221e-04]
40

fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising

print(reintegrated_seasonal)
return reintegrated_seasonal

forecast = add_seasonality(pr, df_diff['close'], 12)


# After adding Seasonality

[-0.98847128 0.40895329 0.55465833 -0.51217237 -0.42821742 -


2.27987906
-0.22181708 0.55010317 0.81839365 1.40742777 0.45005274
2.65908365
-0.98811996 0.40930462 0.55500966 -0.51182104 -0.42786609 -
2.27952773
-0.22146576 0.55045449 0.81874498 1.4077791 0.45040407
2.65943498
-0.98776863 0.40965595 0.55536098 -0.51146972 -0.42751476 -
2.2791764
-0.22111443 0.55080582 0.8190963 1.40813043 0.45075539
2.6597863
-0.9874173 0.41000727 0.55571231 -0.51111839]

# Adding Trend to predicted data

yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1

print(forecast)
# After adding trend

[33.22047118 33.62942447 34.1840828 33.67191043 33.24369302


30.96381396
30.74199688 31.29210005 32.11049369 33.51792147 33.96797421
36.62705786
35.6389379 36.04824252 36.60325218 36.09143114 35.66356504
33.38403732
33.16257156 33.71302605 34.53177103 35.93955013 36.38995419
39.04938917
38.06162054 38.47127649 39.02663747 38.51516775 38.08765299
35.80847659
35.58736216 36.13816798 36.95726428 38.3653947 38.8161501
41.4759364
40.48851909 40.89852637 41.45423868 40.94312029]

print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))

RMSE : 3.913253243425683
MAE : 3.197834646125524

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
ARMA Model

# ARMA Model
# We will see De-trending and then Deseasonalising of the df Dataframe

# differencing the data to remove trend #


Detrending the data
df_diff = train_data.diff().dropna()
df_diff.plot(grid=True, title='De-trended plot')
print(df_diff)
print('p-value : ',adfuller(df_diff.dropna())[1]) # p-
value is 0 meaning stationary data

close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732

[159 rows x 1 columns]


p-value : 1.2982777528844904e-09

# Deseasonalising

df_diff_diff = df_diff.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])

close
13 -2.598987
14 -0.303058
15 2.929620
16 1.280896
17 0.656418
18 -3.733996
19 2.262514
20 0.561302
21 4.249559
22 -0.915679
23 0.296673
24 -0.516127
25 -0.151564
26 -2.089896
p-value : 3.488078105631728e-05

# PACF and ACF plots of training data for AR and MA model order (p,q)

plot_acf(df_diff_diff.dropna(),lags=20);
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting (ARMA model of order (4,3))
model = ARIMA(df_diff_diff,order=(4,0,3))
model_fit = model.fit()

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "

The Ljung-Box test is a statistical test used to check for autocorrelation in a time series. It helps
determine if a series of data points are independently distributed or if there's a pattern where
one data point is related to another at a different point in time. This test is crucial for ensuring
the validity of time series models and forecasts, as it helps identify if there's any autocorrelation
remaining in the residuals after a model is fitted.

Purpose: The Ljung-Box test assesses whether a time series is "white noise," meaning the data
points are random and independent of each other. It's often used to check the residuals of a time
series model (like ARIMA) to see if the model has captured all the underlying patterns in the
data. If the test indicates autocorrelation in the residuals, it suggests that the model is not
adequate and there's room for improvement. How it works: The test calculates a statistic (Q)
based on the autocorrelations of the time series at different lags (time differences). It then
compares this statistic to a chi-squared distribution with degrees of freedom related to the
number of lags considered. A small p-value (typically less than 0.05) from the test indicates that
the autocorrelations are significantly different from zero, suggesting the time series is not white
noise.

The null hypothesis of the Jarque-Bera test is that the data follows a normal distribution. In
other words, the test assesses whether the sample data's skewness and kurtosis match those of
a normal distribution. A rejection of the null hypothesis suggests that the data is not normally
distributed.

model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results

======================================================================
========
Dep. Variable: close No. Observations:
147
Model: ARIMA(4, 0, 3) Log Likelihood
-238.966
Date: Sat, 21 Jun 2025 AIC
495.932
Time: 02:43:34 BIC
522.846
Sample: 0 HQIC
506.867
- 147

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
const -0.0002 0.007 -0.029 0.977 -0.013
0.013
ar.L1 -0.2164 0.107 -2.020 0.043 -0.426
-0.006
ar.L2 0.1019 0.103 0.992 0.321 -0.099
0.303
ar.L3 0.1603 0.093 1.716 0.086 -0.023
0.343
ar.L4 -0.2386 0.102 -2.340 0.019 -0.438
-0.039
ma.L1 0.0161 233.822 6.88e-05 1.000 -458.267
458.299
ma.L2 -0.0180 237.572 -7.58e-05 1.000 -465.651
465.615
ma.L3 -0.9981 233.214 -0.004 0.997 -458.088
456.092
sigma2 1.3986 326.794 0.004 0.997 -639.107
641.904
======================================================================
=============
Ljung-Box (L1) (Q): 0.08 Jarque-Bera (JB):
0.91
Prob(Q): 0.77 Prob(JB):
0.64
Heteroskedasticity (H): 1.26 Skew:
0.18
Prob(H) (two-sided): 0.42 Kurtosis:
3.14
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))

[ 1.86382784e-01 2.02218718e+00 -3.44262223e+00 3.22246561e-01


-1.41312378e-01 -9.71086529e-01 1.06865496e+00 -4.30006207e-01
7.98564441e-02 3.41657174e-01 -3.89941900e-01 2.34392384e-01
-5.49975315e-02 -1.08453742e-01 1.48252167e-01 -1.08108527e-01
3.40237143e-02 3.10311531e-02 -5.61750941e-02 4.63440920e-02
-1.91263039e-02 -7.77024730e-03 2.03383969e-02 -1.95429606e-02
9.39545326e-03 8.62877570e-04 -7.43984451e-03 7.64159141e-03
-4.74126340e-03 1.81157896e-04 2.25196290e-03 -3.27773520e-03
1.87372619e-03 -6.47452894e-04 -9.57118743e-04 9.98018769e-04
-1.08996170e-03 1.13161034e-04 2.72013731e-05 -6.32731231e-04]
40

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(

fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
def add_seasonality(diff_preds, diff_ar, k=1):
n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising

print(reintegrated_seasonal)
return reintegrated_seasonal

forecast = add_seasonality(pr, df_diff['close'], 12)


# After adding Seasonality

[-1.57000444 0.96981067 0.50457834 -0.19027713 -0.56988112 -


3.25131691
0.84648656 0.11974563 0.89789877 1.74873362 0.05975951
2.89312471
-1.62500197 0.86135693 0.65283051 -0.29838566 -0.53585741 -
3.22028576
0.79031146 0.16608973 0.87877246 1.74096337 0.08009791
2.87358175
-1.61560652 0.8622198 0.64539066 -0.29074407 -0.54059867 -
3.2201046
0.79256343 0.16281199 0.88064619 1.74031592 0.07914079
2.87457977
-1.61669648 0.86233296 0.64541786 -0.2913768 ]

# Adding Trend to predicted data

yt_1 = df.iloc[(train_size)-1,0]
forecast = forecast.cumsum() + yt_1

print(forecast)
# After adding trend

[32.63893802 33.60874869 34.11332703 33.92304989 33.35316877


30.10185186
30.94833842 31.06808405 31.96598282 33.71471644 33.77447595
36.66760066
35.04259869 35.90395562 36.55678612 36.25840046 35.72254306
32.5022573
33.29256876 33.45865849 34.33743095 36.07839432 36.15849224
39.03207398
37.41646747 38.27868727 38.92407793 38.63333386 38.09273519
34.87263059
35.66519402 35.82800601 36.7086522 38.44896812 38.52810891
41.40268868
39.7859922 40.64832516 41.29374302 41.00236622]

print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))

RMSE : 3.947252317111391
MAE : 3.2236045905484514

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# ARIMA Model Creation and forecasting using the same data

# In this model the order is in the form of (p,d,q) , d represnets


difference ; p and q represent AR and MA orders
# so in this model we pass the data De-seasonalising
# Yt = T + S + e (T is Trend , S is Seasonality)
# Yt - S = T + e is passed into ARIMA model ; So data is passed
without De-trending so the parameter d in (p,d,q) represents the
differencing required to De-trend the (Yt - S) data

# Deseasonalising

df_diff_diff = train_data.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Deseasonalised plot ')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])

close
12 0.605552
13 -1.993435
14 -2.296493
15 0.633127
16 1.914023
17 2.570441
18 -1.163555
19 1.098959
20 1.660261
21 5.909820
22 4.994141
23 5.290815
24 4.774688
25 4.623124
p-value : 0.00010159156279840511

# PACF and ACF plots of training data for AR and MA model lags

# PACF plot tells us about p order of AR model


# ACF plot tells us about q order of MA model

plot_acf(df_diff_diff.dropna(),lags=20);
plot_pacf(df_diff_diff.dropna(),lags=20);
# Model Fitting ARIMA of (4,1,3) : Equivalent to AR_MA model of order
(4,3) after Detrending the data

# here we consider linear data so considering differencing once : d =


1

model = ARIMA(df_diff_diff,order=(4,1,3))
model_fit = model.fit()

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:473: ValueWarning: An unsupported index was provided. As
a result, forecasts cannot be generated. To use the model for
forecasting, use one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:473: ValueWarning: An unsupported index was provided. As a result,
forecasts cannot be generated. To use the model for forecasting, use
one of the supported classes of index.
self._init_dates(dates, freq)
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "

model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results

======================================================================
========
Dep. Variable: close No. Observations:
148
Model: ARIMA(4, 1, 3) Log Likelihood
-238.964
Date: Sat, 21 Jun 2025 AIC
493.929
Time: 02:43:39 BIC
517.852
Sample: 0 HQIC
503.649
- 148

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 -0.2154 0.103 -2.096 0.036 -0.417
-0.014
ar.L2 0.1024 0.102 1.002 0.316 -0.098
0.303
ar.L3 0.1582 0.093 1.698 0.090 -0.024
0.341
ar.L4 -0.2384 0.100 -2.380 0.017 -0.435
-0.042
ma.L1 0.0168 773.255 2.17e-05 1.000 -1515.535
1515.568
ma.L2 -0.0172 786.202 -2.19e-05 1.000 -1540.945
1540.911
ma.L3 -0.9995 772.311 -0.001 0.999 -1514.701
1512.701
sigma2 1.3971 1079.464 0.001 0.999 -2114.314
2117.108
======================================================================
=============
Ljung-Box (L1) (Q): 0.09 Jarque-Bera (JB):
0.92
Prob(Q): 0.77 Prob(JB):
0.63
Heteroskedasticity (H): 1.27 Skew:
0.18
Prob(H) (two-sided): 0.41 Kurtosis:
3.14
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))

[3.7777179 5.80533623 2.35994991 2.68350129 2.5342638 1.57093169


2.6358463 2.20702306 2.29160485 2.62765963 2.24216658 2.47525347
2.41857254 2.31352987 2.45914896 2.35247848 2.38726334 2.41693302
2.36250502 2.40820596 2.38918809 2.38227866 2.40202818 2.3831604
2.39268857 2.39347622 2.38658792 2.39415879 2.38967527 2.39013868
2.39242004 2.38946148 2.39147478 2.39098859 2.39028742 2.39141265
2.39054148 2.39084936 2.39103905 2.39062358]
40

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/
tsa_model.py:837: ValueWarning: No supported index is available.
Prediction results will be given with an integer index beginning at
`start`.
return get_prediction_index(
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/base/tsa_model
.py:837: FutureWarning: No supported index is available. In the next
version, calling this method in a model without a supported index will
result in an exception.
return get_prediction_index(

fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(df_diff_diff.index,df_diff_diff,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()

def add_seasonality(diff_preds, diff_ar, k=1):


n = len(diff_ar)
reintegrated_seasonal = np.zeros(len(test_data))
reintegrated_seasonal[:k] = diff_ar[-k:] + diff_preds[:k]
for i in range(k, len(test_data)):
reintegrated_seasonal[i] = reintegrated_seasonal[i-k] +
diff_preds[i] # data_diff is series object and k is the lag to de-
seasonalise data_ar is the original data before de-seasonalising

print(reintegrated_seasonal)
return reintegrated_seasonal

forecast = add_seasonality(pr, train_data['close'], 12)


len(forecast)

[32.65202319 33.627265 34.12907925 33.94010693 33.3623007


30.11873821
30.96148441 31.08241301 31.98503713 33.72816835 33.79237671
36.68419593
35.07059573 35.94079487 36.58822821 36.29258541 35.74956404
32.53567123
33.32398943 33.49061897 34.37422521 36.11044701 36.19440489
39.06735633
37.4632843 38.33427109 38.97481613 38.6867442 38.13923931
34.92580991
35.71640947 35.88008045 36.76569999 38.5014356 38.58469231
41.45876898
39.85382578 40.72512045 41.36585518 41.07736778]

40

print('RMSE : ',np.sqrt(MSE(np.array(test_data),forecast)))
print('MAE : ',MAE(np.array(test_data),forecast))

RMSE : 3.9549642531611253
MAE : 3.2295192758152393

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# SARIMA model creation and forecasting of data
# SARIMA is Seasonal + ARIMA
# here we can pass the data without detrending and de-seasonalising
# here we need to mention orders (p,d,q) and (P,D,Q,S)

# How to determine (p,d,q) and (P,D,Q,S) ?

# (p,d,q) is called order of the non-seasonal component i.e. ( Yt - S


)
# (P,D,Q,S) is called order of the seasonal component i.e. (Yt - T)
# so, we first De-seasonalise the data to get (Yt-S) component and
plot the ACF and PACF and determine accordingly the values of
(P,D,Q,S)
# S is the parameter for number of periods e.g. S = 12 for annually
periodic data

# we do De-trending the data to get (Yt-T) componet and plot ACF and
PACF and determine accordingly the values of (p,d,q)

# Deseasonalising

df_diff_diff = train_data.diff(12).dropna() #
Deseasonalising
df_diff_diff.plot(grid=True,title='Original Data Deseasonalised plot')
print(df_diff_diff.head(14))
print('p-value : ',adfuller(df_diff_diff.dropna())[1])

close
12 0.605552
13 -1.993435
14 -2.296493
15 0.633127
16 1.914023
17 2.570441
18 -1.163555
19 1.098959
20 1.660261
21 5.909820
22 4.994141
23 5.290815
24 4.774688
25 4.623124
p-value : 0.00010159156279840511
plot_acf(df_diff_diff,lags=20,title='ACF of De-seasonalised data');
plot_pacf(df_diff_diff,lags=20,title='PACF of De-seasonalised data');
P = 1
Q = 2
D = 1
S = 12
S_order = (P,Q,D,S)

# differencing the data to remove trend #


Detrending the data
df_diff = train_data.diff().dropna()
df_diff.plot(grid=True, title='Original data De-trended plot')
print(df_diff)
print('p-value : ',adfuller(df_diff.dropna())[1]) # p-
value is 0 meaning stationary data

close
1 3.346162
2 1.747919
3 0.611830
4 -1.559499
5 -1.034193
.. ...
155 0.549752
156 0.818042
157 1.407076
158 0.449701
159 2.658732

[159 rows x 1 columns]


p-value : 1.2982777528844904e-09

plot_acf(df_diff,lags=20,title='ACF of De-trended data');


plot_pacf(df_diff,lags=20,title='PACF of De-trended data');
p = 3
q = 3
d = 1
order = (p,d,q)

# Model Fitting (SARIMA model) : (p,d,q) X (P,D,Q,S)

model = SARIMAX(train_data, order = order, seasonal_order = S_order)


model_fit = model.fit()

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/
sarimax.py:978: UserWarning: Non-invertible starting MA parameters
found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.11/dist-packages/statsmodels/base/model.py:607:
ConvergenceWarning: Maximum Likelihood optimization failed to
converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "

model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
======================
Dep. Variable: close No.
Observations: 160
Model: SARIMAX(3, 1, 3)x(1, 2, [1], 12) Log Likelihood
-240.138
Date: Sat, 21 Jun 2025 AIC
498.277
Time: 02:43:48 BIC
524.424
Sample: 0 HQIC
508.902
- 160

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 -0.1093 0.133 -0.824 0.410 -0.370
0.151
ar.L2 0.0671 0.138 0.486 0.627 -0.203
0.338
ar.L3 0.0080 0.127 0.063 0.950 -0.242
0.258
ma.L1 -0.1400 1.148 -0.122 0.903 -2.389
2.109
ma.L2 0.0265 1.028 0.026 0.979 -1.989
2.042
ma.L3 -0.8832 1.042 -0.848 0.397 -2.925
1.158
ar.S.L12 -0.3636 0.109 -3.333 0.001 -0.577
-0.150
ma.S.L12 -0.9968 10.353 -0.096 0.923 -21.288
19.295
sigma2 1.4220 15.330 0.093 0.926 -28.625
31.469
======================================================================
=============
Ljung-Box (L1) (Q): 0.12 Jarque-Bera (JB):
2.13
Prob(Q): 0.73 Prob(JB):
0.34
Heteroskedasticity (H): 1.06 Skew:
0.26
Prob(H) (two-sided): 0.84 Kurtosis:
3.34
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

predicted = model_fit.forecast(steps=len(test_data))
pr = np.array(predicted)
print(pr)
print(len(pr))

[32.63906969 33.91643287 33.80946257 33.99129674 32.55991399


30.96213667
30.68841905 31.5926066 31.67118061 33.57657141 34.06616551
35.77973614
34.09804581 34.69413592 36.35348484 36.50693062 35.50621922
33.49260184
33.04192511 33.77300554 33.9558969 35.59100502 35.94570993
38.06103785
36.37600403 37.38559677 38.69498952 39.08283064 37.99158878
35.96146041
35.37942245 36.12920538 36.10962154 37.75390164 38.03726641
40.06462451
38.33680706 39.36185224 41.09074671 41.61743747]
40

fig , ax = plt.subplots(1,1,figsize=(10,6))
plt.grid()
plt.plot(train_data.index,train_data,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
print('RMSE : ',np.sqrt(MSE(np.array(test_data),pr)))
print('MAE : ',MAE(np.array(test_data),pr))

RMSE : 3.811545125830629
MAE : 3.120911611139277

forecast = pr

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()
# create all combinations of differencing orders, applying seasonal
differencing first and then general differencing
def differencing(timeseries, s, D_max=2, d_max=2):

# Seasonal differencing from 0 to D_max


seas_differenced = []
for i in range(D_max+1):
timeseries.name = f"d0_D{i}_s{s}"
seas_differenced.append(timeseries)
timeseries = timeseries.diff(periods=s)
seas_df = pd.DataFrame(seas_differenced).T

# General differencing from 0 to d_max


general_differenced = []
for j, ts in enumerate(seas_differenced):
for i in range(1,d_max+1):
ts = ts.diff()
ts.name = f"d{i}_D{j}_s{s}"
general_differenced.append(ts)
gen_df = pd.DataFrame(general_differenced).T

# concatenate seasonal and general differencing dataframes


return pd.concat([seas_df, gen_df], axis=1)

# create the differenced series


diff_series = differencing(train_data['close'], s=12, D_max=2,
d_max=2)
diff_series

{"summary":"{\n \"name\": \"diff_series\",\n \"rows\": 160,\n


\"fields\": [\n {\n \"column\": \"d0_D0_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
9.71712908669858,\n \"min\": -3.6445210775896832,\n
\"max\": 34.20894246053696,\n \"num_unique_values\": 160,\n
\"samples\": [\n 18.68097495712577,\n
21.76625072474885,\n 25.727888287444575\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d0_D1_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
1.865285047317591,\n \"min\": -2.296493252581687,\n
\"max\": 7.6861951142586165,\n \"num_unique_values\": 148,\n
\"samples\": [\n 4.028122075249183,\n
5.2311878312897555,\n 2.675282672350253\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d0_D2_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.9821826702889904,\n \"min\": -9.594362794649061,\n
\"max\": 6.616558996210974,\n \"num_unique_values\": 136,\n
\"samples\": [\n 4.4236640331724,\n -
0.8819432323785268,\n -4.931684738552949\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d1_D0_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
1.5510556840414214,\n \"min\": -4.433690900673383,\n
\"max\": 4.208129077683946,\n \"num_unique_values\": 159,\n
\"samples\": [\n -0.9459135536527441,\n
0.8180423227216203,\n 1.9771944830554418\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d2_D0_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.074678968676605,\n \"min\": -5.284559193678842,\n
\"max\": 4.999577078129374,\n \"num_unique_values\": 158,\n
\"samples\": [\n -2.433411407655509,\n
1.8504650165747805,\n -3.0731835710599533\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d1_D1_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
1.6285442428940018,\n \"min\": -3.733996439147243,\n
\"max\": 5.236556818566619,\n \"num_unique_values\": 147,\n
\"samples\": [\n -0.9359430047826329,\n -
2.009318299068969,\n -0.2852930495501589\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d2_D1_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.490239945937002,\n \"min\": -5.521849868116778,\n
\"max\": 7.148489852091732,\n \"num_unique_values\": 146,\n
\"samples\": [\n -3.8154482263356595,\n
0.500260156678852,\n -0.3525297123166302\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d1_D2_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.472298740231023,\n \"min\": -6.417286387965113,\n
\"max\": 6.172499823349252,\n \"num_unique_values\": 135,\n
\"samples\": [\n -0.608450368645876,\n -
3.6864613624688953,\n -0.7775101909665771\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"d2_D2_s12\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
3.706372590205228,\n \"min\": -7.881638444082373,\n
\"max\": 12.006891768709622,\n \"num_unique_values\": 134,\n
\"samples\": [\n -3.450842990851161,\n -
2.4392711509368343,\n -6.367115571711086\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"diff_series"}

# create a summary of test results of all the series


def adf_summary(diff_series):
summary = []

for i in diff_series:
# unpack the results
a, b, c, d, e, f = adfuller(diff_series[i].dropna())
g, h, i = e.values()
results = [a, b, c, d, g, h, i]
summary.append(results)

columns = ["Test Statistic", "p-value", "#Lags Used", "No. of Obs.


Used",
"Critical Value (1%)", "Critical Value (5%)", "Critical
Value (10%)"]
index = diff_series.columns
summary = pd.DataFrame(summary, index=index, columns=columns)

return summary

# create the summary


summary = adf_summary(diff_series)

# filter away results that are not stationary


summary_passed = summary[summary["p-value"] < 0.05]
summary_passed

{"summary":"{\n \"name\": \"summary_passed\",\n \"rows\": 8,\n


\"fields\": [\n {\n \"column\": \"Test Statistic\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
1.287184869625847,\n \"min\": -7.682861737052302,\n
\"max\": -3.901420453565611,\n \"num_unique_values\": 8,\n
\"samples\": [\n -6.497850309622152,\n -
6.138716330797678,\n -4.656282659812482\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"p-value\",\n \"properties\":
{\n \"dtype\": \"number\",\n \"std\":
0.0007087541917717493,\n \"min\": 1.487931091094185e-11,\n
\"max\": 0.0020244181841361265,\n \"num_unique_values\": 8,\n
\"samples\": [\n 1.181983584876745e-08,\n
8.06829047199941e-08,\n 0.00010159156279840511\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"#Lags Used\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
1,\n \"min\": 11,\n \"max\": 14,\n
\"num_unique_values\": 4,\n \"samples\": [\n 11,\n
13,\n 14\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\": \"No.
of Obs. Used\",\n \"properties\": {\n \"dtype\":
\"number\",\n \"std\": 9,\n \"min\": 120,\n
\"max\": 146,\n \"num_unique_values\": 7,\n \"samples\":
[\n 133,\n 124,\n 122\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Critical Value (1%)\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.0036821415235950656,\n \"min\": -3.486055829282407,\n
\"max\": -3.4759527332353084,\n \"num_unique_values\": 7,\n
\"samples\": [\n -3.480500383888377,\n -
3.484219653271961,\n -3.4851223522012855\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Critical Value (5%)\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.0016017485140968058,\n \"min\": -2.8859430324074076,\n
\"max\": -2.881548071241103,\n \"num_unique_values\": 7,\n
\"samples\": [\n -2.8835279559405045,\n -
2.885145235641637,\n -2.88553750045158\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"Critical Value (10%)\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
0.0008551149021405202,\n \"min\": -2.5797850694444446,\n
\"max\": -2.577438765246763,\n \"num_unique_values\": 7,\n
\"samples\": [\n -2.578495716547007,\n -
2.579359138917794,\n -2.5795685622144586\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"summary_passed"}

# output indices as a list


index_list = pd.Index.tolist(summary_passed.index)

# use the list as a condition to keep stationary time-series


passed_series = diff_series[index_list].sort_index(axis=1)
from statsmodels.tsa.stattools import acf, pacf

PACF, PACF_ci = pacf(passed_series.iloc[:,0].dropna(), alpha=0.05)

# Plot PACF
plt.figure(figsize=(10,3))
plt.plot(PACF, color='k', label='PACF')
plt.plot(PACF_ci, color='tab:blue', linestyle='--', label=['95%
Confidence Interval', ''])
plt.legend()
plt.tight_layout();

# subtract the confidence interval from the PACF to center the CI in


zero
plt.figure(figsize=(10,3))
plt.fill_between(range(22), PACF_ci[:,0] - PACF, PACF_ci[:,1] - PACF,
color='tab:blue', alpha=0.3)
plt.hlines(y=0.0, xmin=0, xmax=29, linewidth=1, color='gray')

# Display the PACF as bars


plt.vlines(range(22), [0], PACF[:22], color='black')
plt.tight_layout();

df_sp_p = pd.DataFrame() # create an empty dataframe to store values


of significant spikes in PACF plots
for i in passed_series:
# unpack the results into PACF and their CI
PACF, PACF_ci = pacf(passed_series[i].dropna(), alpha=0.05,
method='ywm')
# subtract the upper and lower limits of CI by PACF to centre CI
at zero
PACF_ci_ll = PACF_ci[:,0] - PACF
PACF_ci_ul = PACF_ci[:,1] - PACF
# find positions of significant spikes representing possible value
of p & P
sp1 = np.where(PACF < PACF_ci_ll)[0]
sp2 = np.where(PACF > PACF_ci_ul)[0]
# PACF values of the significant spikes
sp1_value = abs(PACF[PACF < PACF_ci_ll])
sp2_value = PACF[PACF > PACF_ci_ul]
# store values to dataframe
sp1_series = pd.Series(sp1_value, index=sp1)
sp2_series = pd.Series(sp2_value, index=sp2)
df_sp_p = pd.concat((df_sp_p, sp1_series, sp2_series), axis=1)
df_sp_p = df_sp_p.sort_index() # Sort the dataframe by index
# visualize sums of values of significant spikes in PACF plots ordered
by lag
df_sp_p.iloc[1:].T.sum().plot(kind='bar', title='Candidate AR Terms',
xlabel='nth lag', ylabel='Sum of PACF', figsize=(8,3));

df_sp_q = pd.DataFrame()
for i in passed_series:
# unpack the results into ACF and their CI
ACF, ACF_ci = acf(passed_series[i].dropna(), alpha=0.05)
# subtract the upper and lower limits of CI by ACF to centre CI at
zero
ACF_ci_ll = ACF_ci[:,0] - ACF
ACF_ci_ul = ACF_ci[:,1] - ACF
# find positions of significant spikes representing possible value
of q & Q
sp1 = np.where(ACF < ACF_ci_ll)[0]
sp2 = np.where(ACF > ACF_ci_ul)[0]
# ACF values of the significant spikes
sp1_value = abs(ACF[ACF < ACF_ci_ll])
sp2_value = ACF[ACF > ACF_ci_ul]
# store values to dataframe
sp1_series = pd.Series(sp1_value, index=sp1)
sp2_series = pd.Series(sp2_value, index=sp2)
df_sp_q = pd.concat((df_sp_q, sp1_series, sp2_series), axis=1)
df_sp_q = df_sp_q.sort_index() # Sort the dataframe by index
# visualize sums of values of significant spikes in ACF plots ordered
by lags
df_sp_q.iloc[1:].T.sum().plot(kind='bar', title='Candidate MA Terms',
xlabel='nth lag', ylabel='Sum of ACF', figsize=(8,3));

from itertools import product

# possible values
p = [1, 2, 3]
d = [0, 1]
q = [1, 2]
P = [0, 1]
D = [0, 1, 2]
Q = [0, 1]
s = [12]

# create all combinations of possible values


pdq = list(product(p, d, q))
PDQm = list(product(P, D, Q, s))

print(f"Number of total combinations: {len(pdq)*len(PDQm)}")

Number of total combinations: 144

import warnings
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error as mse
warnings.simplefilter("ignore")
def SARIMA_grid(endog, order, seasonal_order):

# create an empty list to store values


model_info = []

#fit the model


for i in tqdm(order):
for j in seasonal_order:
try:
model_fit = SARIMAX(endog=endog, order=i,
seasonal_order=j).fit(disp=False)
predict = model_fit.predict()

# calculate evaluation metrics: MAPE, RMSE, AIC & BIC


MAPE = (abs((endog-predict)[1:])/(endog[1:])).mean()
MSE = mse(endog[1:], predict[1:])
AIC = model_fit.aic
BIC = model_fit.bic

# save order, seasonal order & evaluation metrics


model_info.append([i, j, MAPE, MSE, AIC, BIC])
except:
continue

# create a dataframe to store info of all models


columns = ["order", "seasonal_order", "MAPE", "MSE", "AIC", "BIC"]
model_info = pd.DataFrame(data=model_info, columns=columns)
return model_info

train_data

{"summary":"{\n \"name\": \"train_data\",\n \"rows\": 160,\n


\"fields\": [\n {\n \"column\": \"close\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
9.71712908669858,\n \"min\": -3.6445210775896832,\n
\"max\": 34.20894246053696,\n \"num_unique_values\": 160,\n
\"samples\": [\n 18.68097495712577,\n
21.76625072474885,\n 25.727888287444575\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"train_data"}
test_data

{"summary":"{\n \"name\": \"test_data\",\n \"rows\": 40,\n


\"fields\": [\n {\n \"column\": \"close\",\n
\"properties\": {\n \"dtype\": \"number\",\n \"std\":
2.6482908606836437,\n \"min\": 30.306172594639495,\n
\"max\": 40.455921833410194,\n \"num_unique_values\": 40,\n
\"samples\": [\n 37.23189179949507,\n
34.62804134499462,\n 33.41869736828887\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"test_data"}

import time

start = time.time()

# fit all combinations into the model


model_info = SARIMA_grid(endog=train_data, order=pdq,
seasonal_order=PDQm)

end = time.time()
print(f'time required: {end - start :.2f}')

{"model_id":"c11e696caa0b4a37b08f6d01926d65f1","version_major":2,"vers
ion_minor":0}

time required: 193.78

model_info = model_info.drop(columns=['MAPE'])

model_info

{"summary":"{\n \"name\": \"model_info\",\n \"rows\": 144,\n


\"fields\": [\n {\n \"column\": \"order\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 12,\n \"samples\": [\n [\n
3,\n 1,\n 1\n ],\n [\n
3,\n 0,\n 2\n ],\n [\n
1,\n 0,\n 1\n ]\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"seasonal_order\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 12,\n \"samples\": [\n [\n
1,\n 2,\n 0,\n 12\n ],\n
[\n 1,\n 1,\n 1,\n 12\n
],\n [\n 0,\n 0,\n 0,\n
12\n ]\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"MSE\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 1.2623634113933708,\n \"min\": 1.4045519881963537,\n
\"max\": 6.23718964011322,\n \"num_unique_values\": 144,\n
\"samples\": [\n 2.1398555971874003,\n
1.8130468064236145,\n 4.625718874487259\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"AIC\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 33.144742437532784,\n
\"min\": 480.1249028746027,\n \"max\": 627.8013669176464,\n
\"num_unique_values\": 144,\n \"samples\": [\n
492.1415619156642,\n 564.1869904524099,\n
594.061088320967\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 31.079544130204955,\n \"min\": 499.9769905363252,\n
\"max\": 639.4224660314002,\n \"num_unique_values\": 144,\n
\"samples\": [\n 516.1192601057771,\n
582.6380333438128,\n 608.5874622131591\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"model_info"}

# 10 least MSE models


least_MSE = model_info.nsmallest(10, "MSE")
least_MSE

{"summary":"{\n \"name\": \"least_MSE\",\n \"rows\": 10,\n


\"fields\": [\n {\n \"column\": \"order\",\n
\"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 6,\n \"samples\": [\n [\n
2,\n 1,\n 2\n ],\n [\n
3,\n 0,\n 2\n ],\n [\n
2,\n 0,\n 2\n ]\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"seasonal_order\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 3,\n \"samples\": [\n [\n
1,\n 0,\n 1,\n 12\n ],\n
[\n 1,\n 0,\n 0,\n 12\n
],\n [\n 0,\n 0,\n 1,\n
12\n ]\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"MSE\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 0.03725254628261546,\n \"min\": 1.4045519881963537,\n
\"max\": 1.5185192360823299,\n \"num_unique_values\": 10,\n
\"samples\": [\n 1.5116306604278449,\n
1.4112266574998145,\n 1.4782586189758766\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"AIC\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 7.093209372430016,\n
\"min\": 514.2602324974321,\n \"max\": 537.4497710722164,\n
\"num_unique_values\": 10,\n \"samples\": [\n
526.5457684689404,\n 525.618218943418,\n
521.007326275158\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 7.520991458971778,\n \"min\": 535.7425619129738,\n
\"max\": 558.9759877788532,\n \"num_unique_values\": 10,\n
\"samples\": [\n 544.9591936822618,\n
550.2196094652886,\n 542.4896556906996\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"least_MSE"}

# 10 least AIC models


least_AIC = model_info.nsmallest(10, "AIC")
least_AIC

{"summary":"{\n \"name\": \"least_AIC\",\n \"rows\": 10,\n


\"fields\": [\n {\n \"column\": \"order\",\n
\"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 6,\n \"samples\": [\n [\n
3,\n 1,\n 2\n ],\n [\n
3,\n 1,\n 1\n ],\n [\n
1,\n 1,\n 2\n ]\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"seasonal_order\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 3,\n \"samples\": [\n [\n
1,\n 1,\n 1,\n 12\n ],\n
[\n 0,\n 1,\n 1,\n 12\n
],\n [\n 1,\n 2,\n 1,\n
12\n ]\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"MSE\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 0.9617536029064016,\n \"min\": 1.5814016052863062,\n
\"max\": 3.737747112637574,\n \"num_unique_values\": 10,\n
\"samples\": [\n 2.1715423509242324,\n
1.6207955924417519,\n 3.737687707994945\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"AIC\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 5.75059003973722,\n
\"min\": 480.1249028746027,\n \"max\": 494.5049607208043,\n
\"num_unique_values\": 10,\n \"samples\": [\n
492.48756217311995,\n 480.92212314284495,\n
490.3868415023121\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 5.972585249827593,\n \"min\": 499.9769905363252,\n
\"max\": 516.1192601057771,\n \"num_unique_values\": 10,\n
\"samples\": [\n 513.4680480894688,\n
501.8551512502961,\n 510.7754257024645\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"least_AIC"}
# 10 least BIC models
least_BIC = model_info.nsmallest(10, "BIC")
least_BIC

{"summary":"{\n \"name\": \"least_BIC\",\n \"rows\": 10,\n


\"fields\": [\n {\n \"column\": \"order\",\n
\"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 6,\n \"samples\": [\n [\n
3,\n 1,\n 1\n ],\n [\n
3,\n 1,\n 2\n ],\n [\n
3,\n 0,\n 2\n ]\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"seasonal_order\",\n
\"properties\": {\n \"dtype\": \"category\",\n
\"num_unique_values\": 3,\n \"samples\": [\n [\n
0,\n 1,\n 1,\n 12\n ],\n
[\n 1,\n 1,\n 1,\n 12\n
],\n [\n 1,\n 2,\n 1,\n
12\n ]\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"MSE\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 0.9801710401235275,\n \"min\": 1.5814016052863062,\n
\"max\": 3.737747112637574,\n \"num_unique_values\": 10,\n
\"samples\": [\n 1.7496144437366088,\n
1.6207955924417519,\n 1.76090294397524\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"AIC\",\n \"properties\": {\n
\"dtype\": \"number\",\n \"std\": 6.169968295694707,\n
\"min\": 480.1249028746027,\n \"max\": 495.729602652447,\n
\"num_unique_values\": 10,\n \"samples\": [\n
495.729602652447,\n 480.92212314284495,\n
494.5049607208043\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 5.642475883348594,\n \"min\": 499.9769905363252,\n
\"max\": 515.6879940210398,\n \"num_unique_values\": 10,\n
\"samples\": [\n 513.6721981731195,\n
501.8551512502961,\n 509.457123654698\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n }\n ]\n}","type":"dataframe","variable_name":"least_BIC"}

set(least_AIC.index) & set(least_BIC.index)

{23, 39, 71, 111, 119, 123, 129, 135, 141}

# the best model by each metric

L1 = model_info[model_info.AIC == model_info.AIC.min()]
L2 = model_info[model_info.BIC == model_info.BIC.min()]
best_models = pd.concat((L1, L2))
best_models

{"summary":"{\n \"name\": \"best_models\",\n \"rows\": 2,\n


\"fields\": [\n {\n \"column\": \"order\",\n
\"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 2,\n \"samples\": [\n [\n
3,\n 1,\n 1\n ],\n [\n
3,\n 1,\n 2\n ]\n ],\n
\"semantic_type\": \"\",\n \"description\": \"\"\n }\
n },\n {\n \"column\": \"seasonal_order\",\n
\"properties\": {\n \"dtype\": \"string\",\n
\"num_unique_values\": 2,\n \"samples\": [\n [\n
0,\n 1,\n 1,\n 12\n ],\n
[\n 1,\n 1,\n 1,\n 12\n
]\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"MSE\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 0.05707387543402115,\n \"min\": 1.5814016052863062,\n
\"max\": 1.6621162539822916,\n \"num_unique_values\": 2,\n
\"samples\": [\n 1.6621162539822916,\n
1.5814016052863062\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"AIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 1.3502148415589614,\n \"min\": 480.1249028746027,\n
\"max\": 482.0343950156528,\n \"num_unique_values\": 2,\n
\"samples\": [\n 482.0343950156528,\n
480.1249028746027\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n },\n {\n \"column\":
\"BIC\",\n \"properties\": {\n \"dtype\": \"number\",\n
\"std\": 2.8788954800260074,\n \"min\": 499.9769905363252,\n
\"max\": 504.0483635688326,\n \"num_unique_values\": 2,\n
\"samples\": [\n 499.9769905363252,\n
504.0483635688326\n ],\n \"semantic_type\": \"\",\n
\"description\": \"\"\n }\n }\n ]\
n}","type":"dataframe","variable_name":"best_models"}

# Fitting the best model to check results

i = 1 # Fitting the model with lowest BIC


model = SARIMAX(train_data, order=best_models.iloc[i,0],
seasonal_order=best_models.iloc[i,1])
model_fit = model.fit()

model_fit.summary()

<class 'statsmodels.iolib.summary.Summary'>
"""
SARIMAX Results
======================================================================
====================
Dep. Variable: close No. Observations:
160
Model: SARIMAX(3, 1, 1)x(0, 1, 1, 12) Log Likelihood
-235.017
Date: Sat, 21 Jun 2025 AIC
482.034
Time: 02:47:06 BIC
499.977
Sample: 0 HQIC
489.325
- 160

Covariance Type: opg

======================================================================
========
coef std err z P>|z| [0.025
0.975]
----------------------------------------------------------------------
--------
ar.L1 0.5453 0.096 5.679 0.000 0.357
0.734
ar.L2 0.2763 0.097 2.835 0.005 0.085
0.467
ar.L3 -0.4616 0.089 -5.206 0.000 -0.635
-0.288
ma.L1 -0.9138 0.052 -17.728 0.000 -1.015
-0.813
ma.S.L12 -0.9061 0.142 -6.365 0.000 -1.185
-0.627
sigma2 1.2307 0.161 7.660 0.000 0.916
1.546
======================================================================
=============
Ljung-Box (L1) (Q): 0.65 Jarque-Bera (JB):
4.16
Prob(Q): 0.42 Prob(JB):
0.13
Heteroskedasticity (H): 1.07 Skew:
0.38
Prob(H) (two-sided): 0.82 Kurtosis:
3.29
======================================================================
=============

Warnings:
[1] Covariance matrix calculated using the outer product of gradients
(complex-step).
"""

prediction = model_fit.forecast(steps=len(test_data))
pr = np.array(prediction)
print(pr)
print(len(pr))

[33.46180797 33.64885849 32.74907525 32.15401819 30.53200641


30.47409896
30.866001 31.81087371 32.83458312 34.31641538 35.95984549
36.62909376
36.03047499 35.19780261 34.70133531 33.9759051 32.86493489
32.86350765
33.48758737 34.3387776 35.34948119 36.69116514 38.29782237
38.91429922
38.3414357 37.5252012 37.06917258 36.3584468 35.25908037
35.24937719
35.86536264 36.70449621 37.71020902 39.04957655 40.65915699
41.27889156
40.70968139 39.89498983 39.43930823 38.72751163]
40

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(train_data.index,train_data,label='train_data')
plt.plot(test_data.index,pr,label='predicted',linestyle='--',
color='tab:red')
plt.title('predicted from training data')
plt.legend()
plt.show()
print('RMSE : ',np.sqrt(MSE(np.array(test_data),pr)))
print('MAE : ',MAE(np.array(test_data),pr))

RMSE : 3.936924793032358
MAE : 3.219658364456228

forecast = pr

plt.figure(figsize=(10,6))
plt.grid()
plt.plot(test_data.index,test_data,label='actual')
plt.plot(test_data.index,forecast,label='forecast')
plt.title('Forecast VS Actual')
plt.legend()
plt.show()

You might also like