#improting libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import io
#uploading the file
from google.colab import files
uploaded = files.upload()
#loading the dataset
# Assuming the file was uploaded as 'Sample - Superstore (3).csv' based on the output
file_name = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[file_name]), encoding='latin-1')
df.head()
# cleaning the data
df.drop(['Postal Code'], axis=1, inplace=True)
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Month'] = df['Order Date'].dt.to_period('M')
monthly_sales = df.groupby('Month')['Sales'].sum().reset_index()
monthly_sales['Month'] = monthly_sales['Month'].astype(str)
#visualising sales trends
plt.figure(figsize=(12,6))
sns.lineplot(x='Month', y='Sales', data=monthly_sales, marker='o')
plt.xticks(rotation=45)
plt.title('Monthly Sales Trend')
plt.show()
#applying linear regression for forcasting
# first i will convert months to numbers for regression
monthly_sales['Month_Num'] = range(len(monthly_sales))
X = monthly_sales[['Month_Num']]
y = monthly_sales['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))
#forecasting next 3 months
future = pd.DataFrame({'Month_Num': [len(monthly_sales), len(monthly_sales)+1, len(monthly_sales)+2]})
future_sales = model.predict(future)
print("Predicted Future Sales:", future_sales)
# ploting correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap="coolwarm")
plt.title("Correlation Matrix")
plt.show()
# Grouping the data by Month and calculating total sales for each month
monthly_sales = df.groupby(df['Order Date'].dt.to_period('M')).sum(numeric_only=True)
monthly_sales.reset_index(inplace=True)
# I am converting the Period data to string format for compatibility with Power BI or plotting
monthly_sales['Order Date'] = monthly_sales['Order Date'].astype(str)
#Export to CSV
monthly_sales.to_csv('Monthly_Sales.csv', index=False)
from google.colab import files
files.download('Monthly_Sales.csv')
Choose Files No file chosen Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to
enable.
Saving Sample - Superstore.csv to Sample - Superstore (1).csv
MSE: 579646775.1158388
Predicted Future Sales: [76385.37330839 77375.75565028 78366.13799217]