KEMBAR78
Minor Assignment | PDF | Statistics
0% found this document useful (0 votes)
17 views34 pages

Minor Assignment

minor project

Uploaded by

divyanshnayyar55
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views34 pages

Minor Assignment

minor project

Uploaded by

divyanshnayyar55
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 34

!

pip install panda

Requirement already satisfied: panda in c:\users\divya\anaconda3\lib\


site-packages (0.3.1)
Requirement already satisfied: setuptools in c:\users\divya\anaconda3\
lib\site-packages (from panda) (69.5.1)
Requirement already satisfied: requests in c:\users\divya\anaconda3\
lib\site-packages (from panda) (2.32.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\
divya\anaconda3\lib\site-packages (from requests->panda) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\divya\
anaconda3\lib\site-packages (from requests->panda) (3.7)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\divya\
anaconda3\lib\site-packages (from requests->panda) (2.2.2)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\divya\
anaconda3\lib\site-packages (from requests->panda) (2025.1.31)

import pandas as pd

df = pd.read_csv('Housing.csv')
df

price area bedrooms bathrooms stories mainroad guestroom


basement \
0 13300000 7420 4 2 3 yes no
no
1 12250000 8960 4 4 4 yes no
no
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
.. ... ... ... ... ... ... ...
...
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea furnishingstatus

0 no yes 2 yes furnished


1 no yes 3 no furnished

2 no no 2 yes semi-furnished

3 no yes 3 yes furnished

4 no yes 2 no furnished

.. ... ... ... ... ...

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

[545 rows x 13 columns]

df.head()

price area bedrooms bathrooms stories mainroad guestroom


basement \
0 13300000 7420 4 2 3 yes no
no
1 12250000 8960 4 4 4 yes no
no
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes

hotwaterheating airconditioning parking prefarea furnishingstatus


0 no yes 2 yes furnished
1 no yes 3 no furnished
2 no no 2 yes semi-furnished
3 no yes 3 yes furnished
4 no yes 2 no furnished

df.tail()

price area bedrooms bathrooms stories mainroad guestroom


basement \
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea furnishingstatus

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

df.shape

(545, 13)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 545 non-null int64
1 area 545 non-null int64
2 bedrooms 545 non-null int64
3 bathrooms 545 non-null int64
4 stories 545 non-null int64
5 mainroad 545 non-null object
6 guestroom 545 non-null object
7 basement 545 non-null object
8 hotwaterheating 545 non-null object
9 airconditioning 545 non-null object
10 parking 545 non-null int64
11 prefarea 545 non-null object
12 furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB

df.describe()
price area bedrooms bathrooms stories
\
count 5.450000e+02 545.000000 545.000000 545.000000 545.000000

mean 4.766729e+06 5150.541284 2.965138 1.286239 1.805505

std 1.870440e+06 2170.141023 0.738064 0.502470 0.867492

min 1.750000e+06 1650.000000 1.000000 1.000000 1.000000

25% 3.430000e+06 3600.000000 2.000000 1.000000 1.000000

50% 4.340000e+06 4600.000000 3.000000 1.000000 2.000000

75% 5.740000e+06 6360.000000 3.000000 2.000000 2.000000

max 1.330000e+07 16200.000000 6.000000 4.000000 4.000000

parking
count 545.000000
mean 0.693578
std 0.861586
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 3.000000

df.iloc[0:3, 0:4]

price area bedrooms bathrooms


0 13300000 7420 4 2
1 12250000 8960 4 4
2 12250000 9960 3 2

df.iloc[3]

price 12215000
area 7500
bedrooms 4
bathrooms 2
stories 2
mainroad yes
guestroom no
basement yes
hotwaterheating no
airconditioning yes
parking 3
prefarea yes
furnishingstatus furnished
Name: 3, dtype: object

df.loc[10:16, ['area']]

area
10 13200
11 6000
12 6550
13 3500
14 7800
15 6000
16 6600

df.loc[10:16, ['area', 'bedrooms']]

area bedrooms
10 13200 3
11 6000 4
12 6550 4
13 3500 4
14 7800 3
15 6000 4
16 6600 4

df.nunique()

price 219
area 284
bedrooms 6
bathrooms 4
stories 4
mainroad 2
guestroom 2
basement 2
hotwaterheating 2
airconditioning 2
parking 4
prefarea 2
furnishingstatus 3
dtype: int64

df['bathrooms'].unique()

array([2, 4, 1, 3], dtype=int64)

df.isnull().any()

price False
area False
bedrooms False
bathrooms False
stories False
mainroad False
guestroom False
basement False
hotwaterheating False
airconditioning False
parking False
prefarea False
furnishingstatus False
dtype: bool

df.dropna()

price area bedrooms bathrooms stories mainroad guestroom


basement \
0 13300000 7420 4 2 3 yes no
no
1 12250000 8960 4 4 4 yes no
no
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
.. ... ... ... ... ... ... ...
...
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea furnishingstatus

0 no yes 2 yes furnished

1 no yes 3 no furnished

2 no no 2 yes semi-furnished

3 no yes 3 yes furnished

4 no yes 2 no furnished
.. ... ... ... ... ...

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

[545 rows x 13 columns]

df.duplicated().sum()

df.drop(index=[0,1], axis=0)

price area bedrooms bathrooms stories mainroad guestroom


basement \
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
5 10850000 7500 3 3 1 yes no
yes
6 10150000 8580 4 3 4 yes no
no
.. ... ... ... ... ... ... ...
...
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea furnishingstatus

2 no no 2 yes semi-furnished
3 no yes 3 yes furnished

4 no yes 2 no furnished

5 no yes 2 yes semi-furnished

6 no yes 2 yes semi-furnished

.. ... ... ... ... ...

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

[543 rows x 13 columns]

df.drop(index=[0,1], axis=0, inplace=True)

df

price area bedrooms bathrooms stories mainroad guestroom


basement \
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
5 10850000 7500 3 3 1 yes no
yes
6 10150000 8580 4 3 4 yes no
no
.. ... ... ... ... ... ... ...
...
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no
hotwaterheating airconditioning parking prefarea furnishingstatus

2 no no 2 yes semi-furnished

3 no yes 3 yes furnished

4 no yes 2 no furnished

5 no yes 2 yes semi-furnished

6 no yes 2 yes semi-furnished

.. ... ... ... ... ...

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

[543 rows x 13 columns]

grp = df.groupby('area')
grp

<pandas.core.groupby.generic.DataFrameGroupBy object at
0x00000166E10AD430>

d = pd.read_csv('Housing.csv')

d['price'].value_counts()

price
3500000 17
4200000 17
4900000 12
3150000 9
5600000 9
..
6580000 1
4319000 1
4375000 1
4382000 1
13300000 1
Name: count, Length: 219, dtype: int64
d['area'].value_counts()

area
6000 24
3000 14
4500 13
4000 11
5500 9
..
6862 1
4815 1
9166 1
6321 1
3620 1
Name: count, Length: 284, dtype: int64

grp.get_group(9960)['furnishingstatus']

2 semi-furnished
Name: furnishingstatus, dtype: object

grp.get_group(6000)['furnishingstatus']

11 semi-furnished
15 semi-furnished
26 semi-furnished
38 unfurnished
39 semi-furnished
43 semi-furnished
44 furnished
45 semi-furnished
46 furnished
52 furnished
54 semi-furnished
55 unfurnished
59 furnished
60 semi-furnished
68 furnished
71 unfurnished
79 furnished
80 semi-furnished
83 semi-furnished
94 semi-furnished
99 unfurnished
135 unfurnished
227 furnished
486 semi-furnished
Name: furnishingstatus, dtype: object

grp.get_group(6000)
price area bedrooms bathrooms stories mainroad guestroom
basement \
11 9681000 6000 4 3 2 yes yes
yes
15 9100000 6000 4 1 2 yes no
yes
26 8463000 6000 3 2 4 yes yes
yes
38 7962500 6000 3 1 4 yes yes
no
39 7910000 6000 4 2 4 yes no
no
43 7700000 6000 4 2 4 yes no
no
44 7560000 6000 4 2 4 yes no
no
45 7560000 6000 3 2 3 yes no
no
46 7525000 6000 3 2 4 yes no
no
52 7350000 6000 4 2 4 yes yes
no
54 7350000 6000 3 2 2 yes yes
no
55 7350000 6000 3 1 2 yes no
no
59 7210000 6000 3 2 4 yes yes
no
60 7140000 6000 3 2 2 yes yes
no
68 6860000 6000 3 1 1 yes no
no
71 6755000 6000 4 2 4 yes no
no
79 6650000 6000 3 2 3 yes yes
no
80 6629000 6000 3 1 2 yes no
no
83 6580000 6000 3 2 4 yes no
no
94 6300000 6000 4 2 4 yes no
no
99 6265000 6000 4 1 3 yes yes
yes
135 5775000 6000 3 2 4 yes no
no
227 4690000 6000 2 1 1 yes no
yes
486 2870000 6000 2 1 1 yes no
no
hotwaterheating airconditioning parking prefarea furnishingstatus

11 yes no 2 no semi-furnished

15 no no 2 no semi-furnished

26 no yes 0 yes semi-furnished

38 no yes 2 no unfurnished

39 no yes 1 no semi-furnished

43 no no 2 no semi-furnished

44 no yes 1 no furnished

45 no yes 0 no semi-furnished

46 no yes 1 no furnished

52 no yes 1 no furnished

54 no yes 1 no semi-furnished

55 no yes 1 no unfurnished

59 no yes 1 no furnished

60 no no 1 no semi-furnished

68 no yes 1 no furnished

71 no yes 0 no unfurnished

79 no yes 0 no furnished

80 yes no 1 yes semi-furnished

83 no yes 0 no semi-furnished

94 no no 1 no semi-furnished

99 no no 0 yes unfurnished

135 no yes 0 no unfurnished

227 no yes 1 no furnished

486 no no 0 no semi-furnished
grp.get_group(6000).min()

price 2870000
area 6000
bedrooms 2
bathrooms 1
stories 1
mainroad yes
guestroom no
basement no
hotwaterheating no
airconditioning no
parking 0
prefarea no
furnishingstatus furnished
dtype: object

grp.get_group(6000).max()

price 9681000
area 6000
bedrooms 4
bathrooms 3
stories 4
mainroad yes
guestroom yes
basement yes
hotwaterheating yes
airconditioning yes
parking 2
prefarea yes
furnishingstatus unfurnished
dtype: object

d.set_index('area')

price bedrooms bathrooms stories mainroad guestroom


basement \
area

7420 13300000 4 2 3 yes no


no
8960 12250000 4 4 4 yes no
no
9960 12250000 3 2 2 yes no
yes
7500 12215000 4 2 2 yes no
yes
7420 11410000 4 1 2 yes yes
yes
... ... ... ... ... ... ... .
..
3000 1820000 2 1 1 yes no
yes
2400 1767150 3 1 1 no no
no
3620 1750000 2 1 1 yes no
no
2910 1750000 3 1 1 no no
no
3850 1750000 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea


furnishingstatus
area

7420 no yes 2 yes


furnished
8960 no yes 3 no
furnished
9960 no no 2 yes semi-
furnished
7500 no yes 3 yes
furnished
7420 no yes 2 no
furnished
... ... ... ... ... ..
.
3000 no no 2 no
unfurnished
2400 no no 0 no semi-
furnished
3620 no no 0 no
unfurnished
2910 no no 0 no
furnished
3850 no no 0 no
unfurnished

[545 rows x 12 columns]

price area bedrooms bathrooms stories mainroad guestroom


basement \
0 13300000 7420 4 2 3 yes no
no
1 12250000 8960 4 4 4 yes no
no
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
.. ... ... ... ... ... ... ...
...
540 1820000 3000 2 1 1 yes no
yes
541 1767150 2400 3 1 1 no no
no
542 1750000 3620 2 1 1 yes no
no
543 1750000 2910 3 1 1 no no
no
544 1750000 3850 3 1 2 yes no
no

hotwaterheating airconditioning parking prefarea furnishingstatus

0 no yes 2 yes furnished

1 no yes 3 no furnished

2 no no 2 yes semi-furnished

3 no yes 3 yes furnished

4 no yes 2 no furnished

.. ... ... ... ... ...

540 no no 2 no unfurnished

541 no no 0 no semi-furnished

542 no no 0 no unfurnished

543 no no 0 no furnished

544 no no 0 no unfurnished

[545 rows x 13 columns]

d.set_index('furnishingstatus', inplace=True)

d
price area bedrooms bathrooms stories
mainroad \
furnishingstatus

furnished 13300000 7420 4 2 3


yes
furnished 12250000 8960 4 4 4
yes
semi-furnished 12250000 9960 3 2 2
yes
furnished 12215000 7500 4 2 2
yes
furnished 11410000 7420 4 1 2
yes
... ... ... ... ... ... ..
.
unfurnished 1820000 3000 2 1 1
yes
semi-furnished 1767150 2400 3 1 1
no
unfurnished 1750000 3620 2 1 1
yes
furnished 1750000 2910 3 1 1
no
unfurnished 1750000 3850 3 1 2
yes

guestroom basement hotwaterheating airconditioning


parking \
furnishingstatus

furnished no no no yes
2
furnished no no no yes
3
semi-furnished no yes no no
2
furnished no yes no yes
3
furnished yes yes no yes
2
... ... ... ... ...
...
unfurnished no yes no no
2
semi-furnished no no no no
0
unfurnished no no no no
0
furnished no no no no
0
unfurnished no no no no
0

prefarea
furnishingstatus
furnished yes
furnished no
semi-furnished yes
furnished yes
furnished no
... ...
unfurnished no
semi-furnished no
unfurnished no
furnished no
unfurnished no

[545 rows x 12 columns]

d.reset_index()

furnishingstatus price area bedrooms bathrooms stories


mainroad \
0 furnished 13300000 7420 4 2 3
yes
1 furnished 12250000 8960 4 4 4
yes
2 semi-furnished 12250000 9960 3 2 2
yes
3 furnished 12215000 7500 4 2 2
yes
4 furnished 11410000 7420 4 1 2
yes
.. ... ... ... ... ... ...
...
540 unfurnished 1820000 3000 2 1 1
yes
541 semi-furnished 1767150 2400 3 1 1
no
542 unfurnished 1750000 3620 2 1 1
yes
543 furnished 1750000 2910 3 1 1
no
544 unfurnished 1750000 3850 3 1 2
yes

guestroom basement hotwaterheating airconditioning parking


prefarea
0 no no no yes 2
yes
1 no no no yes 3
no
2 no yes no no 2
yes
3 no yes no yes 3
yes
4 yes yes no yes 2
no
.. ... ... ... ... ... .
..
540 no yes no no 2
no
541 no no no no 0
no
542 no no no no 0
no
543 no no no no 0
no
544 no no no no 0
no

[545 rows x 13 columns]

d['area'].sort_values()

furnishingstatus
unfurnished 1650
unfurnished 1700
semi-furnished 1836
semi-furnished 1905
unfurnished 1950
...
unfurnished 12944
furnished 13200
furnished 13200
semi-furnished 15600
unfurnished 16200
Name: area, Length: 545, dtype: int64

!pip install matplotlib

Requirement already satisfied: matplotlib in c:\users\divya\anaconda3\


lib\site-packages (3.8.4)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (4.51.0)
Requirement already satisfied: kiwisolver>=1.3.1 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: numpy>=1.21 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (1.26.4)
Requirement already satisfied: packaging>=20.0 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (23.2)
Requirement already satisfied: pillow>=8 in c:\users\divya\anaconda3\
lib\site-packages (from matplotlib) (10.3.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\divya\
anaconda3\lib\site-packages (from matplotlib) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\divya\anaconda3\
lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)

import numpy as np
import matplotlib.pyplot as plt

price area bedrooms bathrooms stories


mainroad \
furnishingstatus

furnished 13300000 7420 4 2 3


yes
furnished 12250000 8960 4 4 4
yes
semi-furnished 12250000 9960 3 2 2
yes
furnished 12215000 7500 4 2 2
yes
furnished 11410000 7420 4 1 2
yes
... ... ... ... ... ... ..
.
unfurnished 1820000 3000 2 1 1
yes
semi-furnished 1767150 2400 3 1 1
no
unfurnished 1750000 3620 2 1 1
yes
furnished 1750000 2910 3 1 1
no
unfurnished 1750000 3850 3 1 2
yes

guestroom basement hotwaterheating airconditioning


parking \
furnishingstatus
furnished no no no yes
2
furnished no no no yes
3
semi-furnished no yes no no
2
furnished no yes no yes
3
furnished yes yes no yes
2
... ... ... ... ...
...
unfurnished no yes no no
2
semi-furnished no no no no
0
unfurnished no no no no
0
furnished no no no no
0
unfurnished no no no no
0

prefarea
furnishingstatus
furnished yes
furnished no
semi-furnished yes
furnished yes
furnished no
... ...
unfurnished no
semi-furnished no
unfurnished no
furnished no
unfurnished no

[545 rows x 12 columns]

prices = d['price'].values

prices

array([13300000, 12250000, 12250000, 12215000, 11410000, 10850000,


10150000, 10150000, 9870000, 9800000, 9800000, 9681000,
9310000, 9240000, 9240000, 9100000, 9100000, 8960000,
8890000, 8855000, 8750000, 8680000, 8645000, 8645000,
8575000, 8540000, 8463000, 8400000, 8400000, 8400000,
8400000, 8400000, 8295000, 8190000, 8120000, 8080940,
8043000, 7980000, 7962500, 7910000, 7875000, 7840000,
7700000, 7700000, 7560000, 7560000, 7525000, 7490000,
7455000, 7420000, 7420000, 7420000, 7350000, 7350000,
7350000, 7350000, 7343000, 7245000, 7210000, 7210000,
7140000, 7070000, 7070000, 7035000, 7000000, 6930000,
6930000, 6895000, 6860000, 6790000, 6790000, 6755000,
6720000, 6685000, 6650000, 6650000, 6650000, 6650000,
6650000, 6650000, 6629000, 6615000, 6615000, 6580000,
6510000, 6510000, 6510000, 6475000, 6475000, 6440000,
6440000, 6419000, 6405000, 6300000, 6300000, 6300000,
6300000, 6300000, 6293000, 6265000, 6230000, 6230000,
6195000, 6195000, 6195000, 6160000, 6160000, 6125000,
6107500, 6090000, 6090000, 6090000, 6083000, 6083000,
6020000, 6020000, 6020000, 5950000, 5950000, 5950000,
5950000, 5950000, 5950000, 5950000, 5950000, 5943000,
5880000, 5880000, 5873000, 5873000, 5866000, 5810000,
5810000, 5810000, 5803000, 5775000, 5740000, 5740000,
5740000, 5740000, 5740000, 5652500, 5600000, 5600000,
5600000, 5600000, 5600000, 5600000, 5600000, 5600000,
5600000, 5565000, 5565000, 5530000, 5530000, 5530000,
5523000, 5495000, 5495000, 5460000, 5460000, 5460000,
5460000, 5425000, 5390000, 5383000, 5320000, 5285000,
5250000, 5250000, 5250000, 5250000, 5250000, 5250000,
5250000, 5250000, 5250000, 5243000, 5229000, 5215000,
5215000, 5215000, 5145000, 5145000, 5110000, 5110000,
5110000, 5110000, 5075000, 5040000, 5040000, 5040000,
5040000, 5033000, 5005000, 4970000, 4970000, 4956000,
4935000, 4907000, 4900000, 4900000, 4900000, 4900000,
4900000, 4900000, 4900000, 4900000, 4900000, 4900000,
4900000, 4900000, 4893000, 4893000, 4865000, 4830000,
4830000, 4830000, 4830000, 4795000, 4795000, 4767000,
4760000, 4760000, 4760000, 4753000, 4690000, 4690000,
4690000, 4690000, 4690000, 4690000, 4655000, 4620000,
4620000, 4620000, 4620000, 4620000, 4613000, 4585000,
4585000, 4550000, 4550000, 4550000, 4550000, 4550000,
4550000, 4550000, 4543000, 4543000, 4515000, 4515000,
4515000, 4515000, 4480000, 4480000, 4480000, 4480000,
4480000, 4473000, 4473000, 4473000, 4445000, 4410000,
4410000, 4403000, 4403000, 4403000, 4382000, 4375000,
4340000, 4340000, 4340000, 4340000, 4340000, 4319000,
4305000, 4305000, 4277000, 4270000, 4270000, 4270000,
4270000, 4270000, 4270000, 4235000, 4235000, 4200000,
4200000, 4200000, 4200000, 4200000, 4200000, 4200000,
4200000, 4200000, 4200000, 4200000, 4200000, 4200000,
4200000, 4200000, 4200000, 4200000, 4193000, 4193000,
4165000, 4165000, 4165000, 4130000, 4130000, 4123000,
4098500, 4095000, 4095000, 4095000, 4060000, 4060000,
4060000, 4060000, 4060000, 4025000, 4025000, 4025000,
4007500, 4007500, 3990000, 3990000, 3990000, 3990000,
3990000, 3920000, 3920000, 3920000, 3920000, 3920000,
3920000, 3920000, 3885000, 3885000, 3850000, 3850000,
3850000, 3850000, 3850000, 3850000, 3850000, 3836000,
3815000, 3780000, 3780000, 3780000, 3780000, 3780000,
3780000, 3773000, 3773000, 3773000, 3745000, 3710000,
3710000, 3710000, 3710000, 3710000, 3703000, 3703000,
3675000, 3675000, 3675000, 3675000, 3640000, 3640000,
3640000, 3640000, 3640000, 3640000, 3640000, 3640000,
3640000, 3633000, 3605000, 3605000, 3570000, 3570000,
3570000, 3570000, 3535000, 3500000, 3500000, 3500000,
3500000, 3500000, 3500000, 3500000, 3500000, 3500000,
3500000, 3500000, 3500000, 3500000, 3500000, 3500000,
3500000, 3500000, 3493000, 3465000, 3465000, 3465000,
3430000, 3430000, 3430000, 3430000, 3430000, 3430000,
3423000, 3395000, 3395000, 3395000, 3360000, 3360000,
3360000, 3360000, 3360000, 3360000, 3360000, 3360000,
3353000, 3332000, 3325000, 3325000, 3290000, 3290000,
3290000, 3290000, 3290000, 3290000, 3290000, 3290000,
3255000, 3255000, 3234000, 3220000, 3220000, 3220000,
3220000, 3150000, 3150000, 3150000, 3150000, 3150000,
3150000, 3150000, 3150000, 3150000, 3143000, 3129000,
3118850, 3115000, 3115000, 3115000, 3087000, 3080000,
3080000, 3080000, 3080000, 3045000, 3010000, 3010000,
3010000, 3010000, 3010000, 3010000, 3010000, 3003000,
2975000, 2961000, 2940000, 2940000, 2940000, 2940000,
2940000, 2940000, 2940000, 2940000, 2870000, 2870000,
2870000, 2870000, 2852500, 2835000, 2835000, 2835000,
2800000, 2800000, 2730000, 2730000, 2695000, 2660000,
2660000, 2660000, 2660000, 2660000, 2660000, 2660000,
2653000, 2653000, 2604000, 2590000, 2590000, 2590000,
2520000, 2520000, 2520000, 2485000, 2485000, 2450000,
2450000, 2450000, 2450000, 2450000, 2450000, 2408000,
2380000, 2380000, 2380000, 2345000, 2310000, 2275000,
2275000, 2275000, 2240000, 2233000, 2135000, 2100000,
2100000, 2100000, 1960000, 1890000, 1890000, 1855000,
1820000, 1767150, 1750000, 1750000, 1750000], dtype=int64)

mean= np.mean(prices)
mean

4766729.247706422

max= np.max(prices)
max

13300000

np.std(prices)

1868722.8281312082
np.min(prices)

1750000

np.percentile(d['price'], 25)

3430000.0

np.sort(d['price'])

array([ 1750000, 1750000, 1750000, 1767150, 1820000, 1855000,


1890000, 1890000, 1960000, 2100000, 2100000, 2100000,
2135000, 2233000, 2240000, 2275000, 2275000, 2275000,
2310000, 2345000, 2380000, 2380000, 2380000, 2408000,
2450000, 2450000, 2450000, 2450000, 2450000, 2450000,
2485000, 2485000, 2520000, 2520000, 2520000, 2590000,
2590000, 2590000, 2604000, 2653000, 2653000, 2660000,
2660000, 2660000, 2660000, 2660000, 2660000, 2660000,
2695000, 2730000, 2730000, 2800000, 2800000, 2835000,
2835000, 2835000, 2852500, 2870000, 2870000, 2870000,
2870000, 2940000, 2940000, 2940000, 2940000, 2940000,
2940000, 2940000, 2940000, 2961000, 2975000, 3003000,
3010000, 3010000, 3010000, 3010000, 3010000, 3010000,
3010000, 3045000, 3080000, 3080000, 3080000, 3080000,
3087000, 3115000, 3115000, 3115000, 3118850, 3129000,
3143000, 3150000, 3150000, 3150000, 3150000, 3150000,
3150000, 3150000, 3150000, 3150000, 3220000, 3220000,
3220000, 3220000, 3234000, 3255000, 3255000, 3290000,
3290000, 3290000, 3290000, 3290000, 3290000, 3290000,
3290000, 3325000, 3325000, 3332000, 3353000, 3360000,
3360000, 3360000, 3360000, 3360000, 3360000, 3360000,
3360000, 3395000, 3395000, 3395000, 3423000, 3430000,
3430000, 3430000, 3430000, 3430000, 3430000, 3465000,
3465000, 3465000, 3493000, 3500000, 3500000, 3500000,
3500000, 3500000, 3500000, 3500000, 3500000, 3500000,
3500000, 3500000, 3500000, 3500000, 3500000, 3500000,
3500000, 3500000, 3535000, 3570000, 3570000, 3570000,
3570000, 3605000, 3605000, 3633000, 3640000, 3640000,
3640000, 3640000, 3640000, 3640000, 3640000, 3640000,
3640000, 3675000, 3675000, 3675000, 3675000, 3703000,
3703000, 3710000, 3710000, 3710000, 3710000, 3710000,
3745000, 3773000, 3773000, 3773000, 3780000, 3780000,
3780000, 3780000, 3780000, 3780000, 3815000, 3836000,
3850000, 3850000, 3850000, 3850000, 3850000, 3850000,
3850000, 3885000, 3885000, 3920000, 3920000, 3920000,
3920000, 3920000, 3920000, 3920000, 3990000, 3990000,
3990000, 3990000, 3990000, 4007500, 4007500, 4025000,
4025000, 4025000, 4060000, 4060000, 4060000, 4060000,
4060000, 4095000, 4095000, 4095000, 4098500, 4123000,
4130000, 4130000, 4165000, 4165000, 4165000, 4193000,
4193000, 4200000, 4200000, 4200000, 4200000, 4200000,
4200000, 4200000, 4200000, 4200000, 4200000, 4200000,
4200000, 4200000, 4200000, 4200000, 4200000, 4200000,
4235000, 4235000, 4270000, 4270000, 4270000, 4270000,
4270000, 4270000, 4277000, 4305000, 4305000, 4319000,
4340000, 4340000, 4340000, 4340000, 4340000, 4375000,
4382000, 4403000, 4403000, 4403000, 4410000, 4410000,
4445000, 4473000, 4473000, 4473000, 4480000, 4480000,
4480000, 4480000, 4480000, 4515000, 4515000, 4515000,
4515000, 4543000, 4543000, 4550000, 4550000, 4550000,
4550000, 4550000, 4550000, 4550000, 4585000, 4585000,
4613000, 4620000, 4620000, 4620000, 4620000, 4620000,
4655000, 4690000, 4690000, 4690000, 4690000, 4690000,
4690000, 4753000, 4760000, 4760000, 4760000, 4767000,
4795000, 4795000, 4830000, 4830000, 4830000, 4830000,
4865000, 4893000, 4893000, 4900000, 4900000, 4900000,
4900000, 4900000, 4900000, 4900000, 4900000, 4900000,
4900000, 4900000, 4900000, 4907000, 4935000, 4956000,
4970000, 4970000, 5005000, 5033000, 5040000, 5040000,
5040000, 5040000, 5075000, 5110000, 5110000, 5110000,
5110000, 5145000, 5145000, 5215000, 5215000, 5215000,
5229000, 5243000, 5250000, 5250000, 5250000, 5250000,
5250000, 5250000, 5250000, 5250000, 5250000, 5285000,
5320000, 5383000, 5390000, 5425000, 5460000, 5460000,
5460000, 5460000, 5495000, 5495000, 5523000, 5530000,
5530000, 5530000, 5565000, 5565000, 5600000, 5600000,
5600000, 5600000, 5600000, 5600000, 5600000, 5600000,
5600000, 5652500, 5740000, 5740000, 5740000, 5740000,
5740000, 5775000, 5803000, 5810000, 5810000, 5810000,
5866000, 5873000, 5873000, 5880000, 5880000, 5943000,
5950000, 5950000, 5950000, 5950000, 5950000, 5950000,
5950000, 5950000, 6020000, 6020000, 6020000, 6083000,
6083000, 6090000, 6090000, 6090000, 6107500, 6125000,
6160000, 6160000, 6195000, 6195000, 6195000, 6230000,
6230000, 6265000, 6293000, 6300000, 6300000, 6300000,
6300000, 6300000, 6405000, 6419000, 6440000, 6440000,
6475000, 6475000, 6510000, 6510000, 6510000, 6580000,
6615000, 6615000, 6629000, 6650000, 6650000, 6650000,
6650000, 6650000, 6650000, 6685000, 6720000, 6755000,
6790000, 6790000, 6860000, 6895000, 6930000, 6930000,
7000000, 7035000, 7070000, 7070000, 7140000, 7210000,
7210000, 7245000, 7343000, 7350000, 7350000, 7350000,
7350000, 7420000, 7420000, 7420000, 7455000, 7490000,
7525000, 7560000, 7560000, 7700000, 7700000, 7840000,
7875000, 7910000, 7962500, 7980000, 8043000, 8080940,
8120000, 8190000, 8295000, 8400000, 8400000, 8400000,
8400000, 8400000, 8463000, 8540000, 8575000, 8645000,
8645000, 8680000, 8750000, 8855000, 8890000, 8960000,
9100000, 9100000, 9240000, 9240000, 9310000, 9681000,
9800000, 9800000, 9870000, 10150000, 10150000, 10850000,
11410000, 12215000, 12250000, 12250000, 13300000], dtype=int64)

np.var(d['price'])

3492125008378.707

plt.figure(figsize=(8,5))
plt.hist(d['area'], bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of Area')
plt.xlabel('Area')
plt.ylabel('Frequency')
plt.grid()
plt.show()

sorted_data = d.sort_values('area')
plt.figure(figsize=(40,10))
plt.plot(sorted_data['area'], sorted_data['price'], marker='o', ms =
8, mec = 'k', mfc='w', ls = '-', color='green')
plt.title('Line Plot: Area vs Price')
plt.xlabel('Area')
plt.ylabel('Price')
plt.grid()
plt.show()
bedroom_counts = d['bedrooms'].value_counts()
explode = [0.05] * len(bedroom_counts)
plt.figure(figsize=(10,8))
plt.pie(bedroom_counts, labels=bedroom_counts.index, autopct=lambda
p:f'{p:.1f}%'if p>5 else'', explode=explode, startangle=140,
pctdistance=0.8,labeldistance=1.1,
)
plt.title('Distribution of Houses by Bedrooms', fontsize=16)
plt.tight_layout()
plt.show()
plt.figure(figsize=(10,7))
scatter = plt.scatter(d['area'],d['parking'],c=d['bedrooms'],
cmap='viridis',alpha=0.7,edgecolors='w',s=80 )
plt.colorbar(scatter, label='Number of Bedrooms')
plt.title('Area vs Price Colored by Bedrooms', fontsize=16)
plt.xlabel('Area')
plt.ylabel('Price')
plt.grid()
plt.show()
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, projection='3d')
x = d['area']
y = d['bedrooms']
z = d['parking']
sorted_idx = np.argsort(x)
x = x.iloc[sorted_idx]
y = y.iloc[sorted_idx]
z = z.iloc[sorted_idx]
ax.plot(x, y, z, color='c', marker='o', ms = 4, mec = 'k', mfc='w', ls
= ':', lw = 2)
ax.set_xlabel('Area')
ax.set_ylabel('Bedrooms')
ax.set_zlabel('parking')
ax.set_title('3D Line Plot: Area, Bedrooms vs Price')

plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.figure(figsize=(8,5))
sns.boxplot(x='bedrooms', y='price', data=d, palette='Pastel1')
plt.title('Price Distribution Across Bedroom Counts')
plt.xlabel('Number of Bedrooms')
plt.ylabel('Price')
plt.show()
plt.figure(figsize=(8,5))
sns.histplot(d['price'], bins=30, kde=True, color='skyblue',
edgecolor='black')
plt.title('Histogram of Price')
plt.xlabel('Price')
plt.ylabel('Count')
plt.grid()
plt.show()
plt.figure(figsize=(8,5))
sns.kdeplot(d['price'], shade=True, color='purple', linewidth=2)
plt.title('KDE Plot of Price')
plt.xlabel('Price')
plt.ylabel('Density')
plt.grid()
plt.show()
plt.figure(figsize=(8,5))
sns.histplot(d['price'], bins=30, color='orange', edgecolor='black')
plt.title('Distribution Plot of Price with Histogram ')
plt.xlabel('Price')
plt.ylabel('Density')
plt.grid()
plt.show()
import statistics as stats

stats.mean(d['price'])

4766729.247706422

stats.median(d['price'])

4340000

stats.mode(d['bedrooms'])

stats.variance(d['price'])

3498544355820.5728

stats.stdev(d['price'])

1870439.6156573922

stats.harmonic_mean(d['area'])

4398.311914424032

stats.geometric_mean(d['price'])
4443650.939099614

q1 = d['price'].quantile(0.25)
q2 = d['price'].quantile(0.5)
q3 = d['price'].quantile(0.75)
print(f"25% Quantile (Q1): {q1}")
print(f"50% Quantile (Median, Q2): {q2}")
print(f"75% Quantile (Q3): {q3}")
median_grouped_price = stats.median_grouped(d['price'])
print(f"Median Grouped (Price): {median_grouped_price}")

25% Quantile (Q1): 3430000.0


50% Quantile (Median, Q2): 4340000.0
75% Quantile (Q3): 5740000.0
Median Grouped (Price): 4340000.0

You might also like