Minor Assignment
Minor Assignment
import pandas as pd
df = pd.read_csv('Housing.csv')
df
2 no no 2 yes semi-furnished
4 no yes 2 no furnished
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
df.head()
df.tail()
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
df.shape
(545, 13)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 545 non-null int64
1 area 545 non-null int64
2 bedrooms 545 non-null int64
3 bathrooms 545 non-null int64
4 stories 545 non-null int64
5 mainroad 545 non-null object
6 guestroom 545 non-null object
7 basement 545 non-null object
8 hotwaterheating 545 non-null object
9 airconditioning 545 non-null object
10 parking 545 non-null int64
11 prefarea 545 non-null object
12 furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
df.describe()
price area bedrooms bathrooms stories
\
count 5.450000e+02 545.000000 545.000000 545.000000 545.000000
parking
count 545.000000
mean 0.693578
std 0.861586
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 3.000000
df.iloc[0:3, 0:4]
df.iloc[3]
price 12215000
area 7500
bedrooms 4
bathrooms 2
stories 2
mainroad yes
guestroom no
basement yes
hotwaterheating no
airconditioning yes
parking 3
prefarea yes
furnishingstatus furnished
Name: 3, dtype: object
df.loc[10:16, ['area']]
area
10 13200
11 6000
12 6550
13 3500
14 7800
15 6000
16 6600
area bedrooms
10 13200 3
11 6000 4
12 6550 4
13 3500 4
14 7800 3
15 6000 4
16 6600 4
df.nunique()
price 219
area 284
bedrooms 6
bathrooms 4
stories 4
mainroad 2
guestroom 2
basement 2
hotwaterheating 2
airconditioning 2
parking 4
prefarea 2
furnishingstatus 3
dtype: int64
df['bathrooms'].unique()
df.isnull().any()
price False
area False
bedrooms False
bathrooms False
stories False
mainroad False
guestroom False
basement False
hotwaterheating False
airconditioning False
parking False
prefarea False
furnishingstatus False
dtype: bool
df.dropna()
1 no yes 3 no furnished
2 no no 2 yes semi-furnished
4 no yes 2 no furnished
.. ... ... ... ... ...
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
df.duplicated().sum()
df.drop(index=[0,1], axis=0)
2 no no 2 yes semi-furnished
3 no yes 3 yes furnished
4 no yes 2 no furnished
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
df
2 no no 2 yes semi-furnished
4 no yes 2 no furnished
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
grp = df.groupby('area')
grp
<pandas.core.groupby.generic.DataFrameGroupBy object at
0x00000166E10AD430>
d = pd.read_csv('Housing.csv')
d['price'].value_counts()
price
3500000 17
4200000 17
4900000 12
3150000 9
5600000 9
..
6580000 1
4319000 1
4375000 1
4382000 1
13300000 1
Name: count, Length: 219, dtype: int64
d['area'].value_counts()
area
6000 24
3000 14
4500 13
4000 11
5500 9
..
6862 1
4815 1
9166 1
6321 1
3620 1
Name: count, Length: 284, dtype: int64
grp.get_group(9960)['furnishingstatus']
2 semi-furnished
Name: furnishingstatus, dtype: object
grp.get_group(6000)['furnishingstatus']
11 semi-furnished
15 semi-furnished
26 semi-furnished
38 unfurnished
39 semi-furnished
43 semi-furnished
44 furnished
45 semi-furnished
46 furnished
52 furnished
54 semi-furnished
55 unfurnished
59 furnished
60 semi-furnished
68 furnished
71 unfurnished
79 furnished
80 semi-furnished
83 semi-furnished
94 semi-furnished
99 unfurnished
135 unfurnished
227 furnished
486 semi-furnished
Name: furnishingstatus, dtype: object
grp.get_group(6000)
price area bedrooms bathrooms stories mainroad guestroom
basement \
11 9681000 6000 4 3 2 yes yes
yes
15 9100000 6000 4 1 2 yes no
yes
26 8463000 6000 3 2 4 yes yes
yes
38 7962500 6000 3 1 4 yes yes
no
39 7910000 6000 4 2 4 yes no
no
43 7700000 6000 4 2 4 yes no
no
44 7560000 6000 4 2 4 yes no
no
45 7560000 6000 3 2 3 yes no
no
46 7525000 6000 3 2 4 yes no
no
52 7350000 6000 4 2 4 yes yes
no
54 7350000 6000 3 2 2 yes yes
no
55 7350000 6000 3 1 2 yes no
no
59 7210000 6000 3 2 4 yes yes
no
60 7140000 6000 3 2 2 yes yes
no
68 6860000 6000 3 1 1 yes no
no
71 6755000 6000 4 2 4 yes no
no
79 6650000 6000 3 2 3 yes yes
no
80 6629000 6000 3 1 2 yes no
no
83 6580000 6000 3 2 4 yes no
no
94 6300000 6000 4 2 4 yes no
no
99 6265000 6000 4 1 3 yes yes
yes
135 5775000 6000 3 2 4 yes no
no
227 4690000 6000 2 1 1 yes no
yes
486 2870000 6000 2 1 1 yes no
no
hotwaterheating airconditioning parking prefarea furnishingstatus
11 yes no 2 no semi-furnished
15 no no 2 no semi-furnished
38 no yes 2 no unfurnished
39 no yes 1 no semi-furnished
43 no no 2 no semi-furnished
44 no yes 1 no furnished
45 no yes 0 no semi-furnished
46 no yes 1 no furnished
52 no yes 1 no furnished
54 no yes 1 no semi-furnished
55 no yes 1 no unfurnished
59 no yes 1 no furnished
60 no no 1 no semi-furnished
68 no yes 1 no furnished
71 no yes 0 no unfurnished
79 no yes 0 no furnished
83 no yes 0 no semi-furnished
94 no no 1 no semi-furnished
99 no no 0 yes unfurnished
486 no no 0 no semi-furnished
grp.get_group(6000).min()
price 2870000
area 6000
bedrooms 2
bathrooms 1
stories 1
mainroad yes
guestroom no
basement no
hotwaterheating no
airconditioning no
parking 0
prefarea no
furnishingstatus furnished
dtype: object
grp.get_group(6000).max()
price 9681000
area 6000
bedrooms 4
bathrooms 3
stories 4
mainroad yes
guestroom yes
basement yes
hotwaterheating yes
airconditioning yes
parking 2
prefarea yes
furnishingstatus unfurnished
dtype: object
d.set_index('area')
1 no yes 3 no furnished
2 no no 2 yes semi-furnished
4 no yes 2 no furnished
540 no no 2 no unfurnished
541 no no 0 no semi-furnished
542 no no 0 no unfurnished
543 no no 0 no furnished
544 no no 0 no unfurnished
d.set_index('furnishingstatus', inplace=True)
d
price area bedrooms bathrooms stories
mainroad \
furnishingstatus
furnished no no no yes
2
furnished no no no yes
3
semi-furnished no yes no no
2
furnished no yes no yes
3
furnished yes yes no yes
2
... ... ... ... ...
...
unfurnished no yes no no
2
semi-furnished no no no no
0
unfurnished no no no no
0
furnished no no no no
0
unfurnished no no no no
0
prefarea
furnishingstatus
furnished yes
furnished no
semi-furnished yes
furnished yes
furnished no
... ...
unfurnished no
semi-furnished no
unfurnished no
furnished no
unfurnished no
d.reset_index()
d['area'].sort_values()
furnishingstatus
unfurnished 1650
unfurnished 1700
semi-furnished 1836
semi-furnished 1905
unfurnished 1950
...
unfurnished 12944
furnished 13200
furnished 13200
semi-furnished 15600
unfurnished 16200
Name: area, Length: 545, dtype: int64
import numpy as np
import matplotlib.pyplot as plt
prefarea
furnishingstatus
furnished yes
furnished no
semi-furnished yes
furnished yes
furnished no
... ...
unfurnished no
semi-furnished no
unfurnished no
furnished no
unfurnished no
prices = d['price'].values
prices
mean= np.mean(prices)
mean
4766729.247706422
max= np.max(prices)
max
13300000
np.std(prices)
1868722.8281312082
np.min(prices)
1750000
np.percentile(d['price'], 25)
3430000.0
np.sort(d['price'])
np.var(d['price'])
3492125008378.707
plt.figure(figsize=(8,5))
plt.hist(d['area'], bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of Area')
plt.xlabel('Area')
plt.ylabel('Frequency')
plt.grid()
plt.show()
sorted_data = d.sort_values('area')
plt.figure(figsize=(40,10))
plt.plot(sorted_data['area'], sorted_data['price'], marker='o', ms =
8, mec = 'k', mfc='w', ls = '-', color='green')
plt.title('Line Plot: Area vs Price')
plt.xlabel('Area')
plt.ylabel('Price')
plt.grid()
plt.show()
bedroom_counts = d['bedrooms'].value_counts()
explode = [0.05] * len(bedroom_counts)
plt.figure(figsize=(10,8))
plt.pie(bedroom_counts, labels=bedroom_counts.index, autopct=lambda
p:f'{p:.1f}%'if p>5 else'', explode=explode, startangle=140,
pctdistance=0.8,labeldistance=1.1,
)
plt.title('Distribution of Houses by Bedrooms', fontsize=16)
plt.tight_layout()
plt.show()
plt.figure(figsize=(10,7))
scatter = plt.scatter(d['area'],d['parking'],c=d['bedrooms'],
cmap='viridis',alpha=0.7,edgecolors='w',s=80 )
plt.colorbar(scatter, label='Number of Bedrooms')
plt.title('Area vs Price Colored by Bedrooms', fontsize=16)
plt.xlabel('Area')
plt.ylabel('Price')
plt.grid()
plt.show()
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, projection='3d')
x = d['area']
y = d['bedrooms']
z = d['parking']
sorted_idx = np.argsort(x)
x = x.iloc[sorted_idx]
y = y.iloc[sorted_idx]
z = z.iloc[sorted_idx]
ax.plot(x, y, z, color='c', marker='o', ms = 4, mec = 'k', mfc='w', ls
= ':', lw = 2)
ax.set_xlabel('Area')
ax.set_ylabel('Bedrooms')
ax.set_zlabel('parking')
ax.set_title('3D Line Plot: Area, Bedrooms vs Price')
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
plt.figure(figsize=(8,5))
sns.boxplot(x='bedrooms', y='price', data=d, palette='Pastel1')
plt.title('Price Distribution Across Bedroom Counts')
plt.xlabel('Number of Bedrooms')
plt.ylabel('Price')
plt.show()
plt.figure(figsize=(8,5))
sns.histplot(d['price'], bins=30, kde=True, color='skyblue',
edgecolor='black')
plt.title('Histogram of Price')
plt.xlabel('Price')
plt.ylabel('Count')
plt.grid()
plt.show()
plt.figure(figsize=(8,5))
sns.kdeplot(d['price'], shade=True, color='purple', linewidth=2)
plt.title('KDE Plot of Price')
plt.xlabel('Price')
plt.ylabel('Density')
plt.grid()
plt.show()
plt.figure(figsize=(8,5))
sns.histplot(d['price'], bins=30, color='orange', edgecolor='black')
plt.title('Distribution Plot of Price with Histogram ')
plt.xlabel('Price')
plt.ylabel('Density')
plt.grid()
plt.show()
import statistics as stats
stats.mean(d['price'])
4766729.247706422
stats.median(d['price'])
4340000
stats.mode(d['bedrooms'])
stats.variance(d['price'])
3498544355820.5728
stats.stdev(d['price'])
1870439.6156573922
stats.harmonic_mean(d['area'])
4398.311914424032
stats.geometric_mean(d['price'])
4443650.939099614
q1 = d['price'].quantile(0.25)
q2 = d['price'].quantile(0.5)
q3 = d['price'].quantile(0.75)
print(f"25% Quantile (Q1): {q1}")
print(f"50% Quantile (Median, Q2): {q2}")
print(f"75% Quantile (Q3): {q3}")
median_grouped_price = stats.median_grouped(d['price'])
print(f"Median Grouped (Price): {median_grouped_price}")