import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("Datasets/SalesData.csv")
df.head()
Temperature Revenue
0 24.566884 534.799028
1 26.005191 625.190122
2 27.790554 660.632289
3 20.595335 487.706960
4 11.503498 316.240194
sns.scatterplot(x = df["Temperature"], y = df["Revenue"])
<Axes: xlabel='Temperature', ylabel='Revenue'>
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =
train_test_split(df[["Temperature"]], df[["Revenue"]], test_size =
0.2, random_state=2)
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
LinearRegression()
print(f"Coefficient: {lin_reg.coef_}")
print(f"Intercept: {lin_reg.intercept_}")
Coefficient: [[21.38145125]]
Intercept: [46.72052514]
y_pred = lin_reg.predict(X_test)
from sklearn.metrics import mean_squared_error
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
Mean Squared Error: 636.1533670417468
from sklearn.metrics import r2_score
print(f"R-Squared: {r2_score(y_test, y_pred)}")
R-Squared: 0.973546292060864
plt.scatter(X_train, y_train, color = "blue")
plt.plot(X_train, lin_reg.predict(X_train), color = "red")
plt.title("Salary vs Experience (Training Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()
plt.scatter(X_test, y_test, color = "Blue")
plt.plot(X_train, lin_reg.predict(X_train), color = "Red")
plt.title("Salary vs Experience (Testing Set)")
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()
import statsmodels.api as sm
X = df["Temperature"]
y = df[["Revenue"]]
model = sm.OLS(y, X).fit()
print(model.summary())
OLS Regression Results
======================================================================
=================
Dep. Variable: Revenue R-squared (uncentered):
0.997
Model: OLS Adj. R-squared (uncentered):
0.997
Method: Least Squares F-statistic:
1.756e+05
Date: Thu, 13 Feb 2025 Prob (F-statistic):
0.00
Time: 21:13:20 Log-Likelihood:
-2398.1
No. Observations: 500 AIC:
4798.
Df Residuals: 499 BIC:
4802.
Df Model: 1
Covariance Type: nonrobust
======================================================================
=========
coef std err t P>|t| [0.025
0.975]
----------------------------------------------------------------------
---------
Temperature 23.2244 0.055 419.007 0.000 23.116
23.333
======================================================================
========
Omnibus: 3.228 Durbin-Watson:
2.022
Prob(Omnibus): 0.199 Jarque-Bera (JB):
3.426
Skew: 0.080 Prob(JB):
0.180
Kurtosis: 3.372 Cond. No.
1.00
======================================================================
========
Notes:
[1] R² is computed without centering (uncentered) since the model does
not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is
correctly specified.