import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from arch.unitroot import ADF
from statsmodels.tsa.statespace.sarimax import SARIMAX
#2. Load data and create proper datetime column
df = pd.read_csv('USA FIRM SALES DATA.csv')
print(df.tail(5))
# Combine Year and Month into datetime
df['Date'] = pd.to_datetime(df['Year'].astype(str) + ' ' + df['Month'], format='%Y
%B')
# Set datetime as index
df.set_index('Date', inplace=True)
df.index.freq = 'MS'
# Drop original Year/Month
df = df[['BU1', 'BU2', 'BU3']]
# 3. Create Time Series and Plot
df.plot(title="Monthly Sales for BU1, BU2, BU3")
plt.ylabel("Sales (in million USD)")
plt.show()
# 4. Stationarity check
def adf_test(series, name):
result = ADF(series)
print(f"\nADF Test for {name}")
print("ADF Statistic:", result.stat)
print("p-value:", result.pvalue)
print("=> Stationary" if result.pvalue < 0.05 else "=> Non-Stationary")
# Run ADF test for each Business Unit
for bu in df.columns:
adf_test(df[bu], bu)
# 5. Fit SARIMAX models
# Create a dictionary to store fitted models
models = {}
predictions = {}
# Forecast horizon: Jan, Feb, Mar 2018
forecast_steps = 3
# Fit a SARIMAX model for each BU
for bu in df.columns:
model = SARIMAX(df[bu], order=(1, 1, 1), seasonal_order=(1,1,1,12),
enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit(disp=False)
print(f'AIC : {model_fit.aic}')
models[bu] = model_fit
# Forecast next 3 months
pred = model_fit.get_forecast(steps=forecast_steps)
pred_mean = pred.predicted_mean
predictions[bu] = pred_mean
print(f"\nForecast for {bu} (Jan-Mar 2018):")
print(pred_mean)
# 6. Final forecast
forecast_df = pd.DataFrame(predictions)
forecast_df.index = pd.date_range(start='2018-01-01', periods=3, freq='MS')
print("\nAll BU Forecasts (Jan to Mar 2018):")
print(forecast_df)
# 7. Plot forcasts
import matplotlib.pyplot as plt
# Plot observed + forecast for each BU
for bu in df.columns:
plt.figure(figsize=(10, 4))
# Plot historical sales
df[bu].plot(label='Historical', color='blue')
# Plot forecasted sales
forecast_df[bu].plot(label='Forecast (Jan–Mar 2018)', color='orange',
linestyle='--')
plt.title(f"Sales Forecast for {bu}")
plt.xlabel("Date")
plt.ylabel("Sales (in million USD)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()