03/11/2023 12:58 DataScienceTP1-1.
ipynb - Colaboratory
Aya Ben Amara SR A
import pandas as pd
#lets create the dataframe
housingdf = pd.read_csv("housing.csv")
print(housingdf.head())
longitude latitude housing_median_age total_rooms total_bedrooms \
0 -122.23 37.88 41.0 880.0 129.0
1 -122.22 37.86 21.0 7099.0 1106.0
2 -122.24 37.85 52.0 1467.0 190.0
3 -122.25 37.85 52.0 1274.0 235.0
4 -122.25 37.85 52.0 1627.0 280.0
population households median_income median_house_value ocean_proximity
0 322.0 126.0 8.3252 452600.0 NEAR BAY
1 2401.0 1138.0 8.3014 358500.0 NEAR BAY
2 496.0 177.0 7.2574 352100.0 NEAR BAY
3 558.0 219.0 5.6431 341300.0 NEAR BAY
4 565.0 259.0 3.8462 342200.0 NEAR BAY
#Making a copy of the dataset
housingdfCOPY = housingdf.copy()
print(housingdfCOPY.head())
longitude latitude housing_median_age total_rooms total_bedrooms \
0 -122.23 37.88 41.0 880.0 129.0
1 -122.22 37.86 21.0 7099.0 1106.0
2 -122.24 37.85 52.0 1467.0 190.0
3 -122.25 37.85 52.0 1274.0 235.0
4 -122.25 37.85 52.0 1627.0 280.0
population households median_income median_house_value ocean_proximity
0 322.0 126.0 8.3252 452600.0 NEAR BAY
1 2401.0 1138.0 8.3014 358500.0 NEAR BAY
2 496.0 177.0 7.2574 352100.0 NEAR BAY
3 558.0 219.0 5.6431 341300.0 NEAR BAY
4 565.0 259.0 3.8462 342200.0 NEAR BAY
housingdfCOPY.describe()
longitude latitude housing_median_age total_rooms total_bedrooms
count 20640.000000 20640.000000 20640.000000 20640.000000 20433.000000 2
mean -119.569704 35.631861 28.639486 2635.763081 537.870553
std 2.003532 2.135952 12.585558 2181.615252 421.385070
min -124.350000 32.540000 1.000000 2.000000 1.000000
25% -121.800000 33.930000 18.000000 1447.750000 296.000000
50% -118.490000 34.260000 29.000000 2127.000000 435.000000
75% -118.010000 37.710000 37.000000 3148.000000 647.000000
max -114.310000 41.950000 52.000000 39320.000000 6445.000000 3
#importing numpy
import numpy as np
housingdfCOPY.plot()
https://colab.research.google.com/drive/1uTw3UaM7Z9NY3GDju3aYqhJklJzOspV-?hl=fr#scrollTo=8m9B3JStQInL&printMode=true 1/5
03/11/2023 12:58 DataScienceTP1-1.ipynb - Colaboratory
<Axes: >
import random
#function to add random values to the dataset.
def add_random_bad_values(df,number_of_bad_values = 1000,seed=42):
dataframe = housingdfCOPY.copy()
bad_values = [ np.NaN, None]
columns = dataframe.columns.tolist()
dataframe_size = len(dataframe)
random.seed(seed)
for null_values in range(number_of_bad_values):
random_column = random.choice(columns)
random_row = random.randint(0,dataframe_size)
dataframe.loc[random_row,random_column] = random.choice(bad_values)
return dataframe
housingdfCOPY = add_random_bad_values(housingdfCOPY)
#printing the copy
housingdfCOPY.isnull().sum()
longitude 76
latitude 123
housing_median_age 101
total_rooms 97
total_bedrooms 318
population 103
households 88
median_income 98
median_house_value 103
ocean_proximity 97
dtype: int64
#printing the original
housingdf.isnull().sum()
longitude 0
latitude 0
housing_median_age 0
total_rooms 0
total_bedrooms 207
population 0
households 0
median_income 0
median_house_value 0
ocean_proximity 0
dtype: int64
housingdfCOPY.plot()
<Axes: >
https://colab.research.google.com/drive/1uTw3UaM7Z9NY3GDju3aYqhJklJzOspV-?hl=fr#scrollTo=8m9B3JStQInL&printMode=true 2/5
03/11/2023 12:58 DataScienceTP1-1.ipynb - Colaboratory
#importing seabornn and matplotlib
import seaborn as sns
import matplotlib.pyplot as plot
plot.plot(housingdf.total_rooms,housingdf.population) #x axis is for the total rooms, y axis is the popuulation
sns.pairplot(housingdf[['housing_median_age','total_rooms','population','median_income']])
https://colab.research.google.com/drive/1uTw3UaM7Z9NY3GDju3aYqhJklJzOspV-?hl=fr#scrollTo=8m9B3JStQInL&printMode=true 3/5
03/11/2023 12:58 DataScienceTP1-1.ipynb - Colaboratory
<seaborn.axisgrid.PairGrid at 0x7e3d547c6fe0>
fig = plot.figure()
ax = fig.add_subplot(111,projection='3d')
ax.scatter(housingdf.median_income,housingdf.population, housingdf.housing_median_age)
plot.show()
! pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
Collecting https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
Using cached https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
Preparing metadata (setup.py) ... done
Requirement already satisfied: scipy<1.12,>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (1.11
Requirement already satisfied: pandas!=1.4.0,<2.1,>1.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0)
Requirement already satisfied: matplotlib<=3.7.3,>=3.2 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0)
Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (2.4.2)
Requirement already satisfied: PyYAML<6.1,>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (6.0.
Requirement already satisfied: jinja2<3.2,>=2.11.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (3.1
Requirement already satisfied: visions[type_image_path]==0.7.5 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.
Requirement already satisfied: numpy<1.26,>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (1.2
Requirement already satisfied: htmlmin==0.1.12 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (0.1.12)
Requirement already satisfied: phik<0.13,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (0.12
Requirement already satisfied: requests<3,>=2.24.0 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (2.3
Requirement already satisfied: tqdm<5,>=4.48.2 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (4.66.1)
Requirement already satisfied: seaborn<0.13,>=0.10.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (0
Requirement already satisfied: multimethod<2,>=1.4 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (1.1
Requirement already satisfied: statsmodels<1,>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (
Requirement already satisfied: typeguard<5,>=4.1.2 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (4.1
Requirement already satisfied: imagehash==4.3.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (4.3.1)
Requirement already satisfied: wordcloud>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (1.9.2)
Requirement already satisfied: dacite>=1.8 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (1.8.1)
Requirement already satisfied: numba<0.59.0,>=0.56.0 in /usr/local/lib/python3.10/dist-packages (from ydata-profiling==0.0.dev0) (0
Requirement already satisfied: PyWavelets in /usr/local/lib/python3.10/dist-packages (from imagehash==4.3.1->ydata-profiling==0.0.d
Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (from imagehash==4.3.1->ydata-profiling==0.0.dev0)
Requirement already satisfied: attrs>=19.3.0 in /usr/local/lib/python3.10/dist-packages (from visions[type_image_path]==0.7.5->ydat
Requirement already satisfied: networkx>=2.4 in /usr/local/lib/python3.10/dist-packages (from visions[type_image_path]==0.7.5->ydat
Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /usr/local/lib/python3.10/dist-packages (from visions[type_image_pat
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2<3.2,>=2.11.1->ydata-profilin
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-pro
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-profili
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-pr
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-pr
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-prof
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata-pro
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib<=3.7.3,>=3.2->ydata
Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba<0.59.0,>=0.56.0->y
Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from numba<0.59.0,>=0.56.0->ydata-profiling==
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas!=1.4.0,<2.1,>1.1->ydata-profili
Requirement already satisfied: joblib>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from phik<0.13,>=0.11.1->ydata-profiling=
Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->ydata-profiling
Requirement already satisfied: pydantic-core==2.10.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->ydata-profiling=
Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->ydata-profili
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.24.0->ydata
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.24.0->ydata-profiling==
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.24.0->ydata-profi
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.24.0->ydata-profi
Requirement already satisfied: patsy>=0.5.2 in /usr/local/lib/python3.10/dist-packages (from statsmodels<1,>=0.13.2->ydata-profilin
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.2->statsmodels<1,>=0.13.2->ydata-pro
from ydata_profiling import ProfileReport
https://colab.research.google.com/drive/1uTw3UaM7Z9NY3GDju3aYqhJklJzOspV-?hl=fr#scrollTo=8m9B3JStQInL&printMode=true 4/5
03/11/2023 12:58 DataScienceTP1-1.ipynb - Colaboratory
profile = ProfileReport(housingdf, title="data set", html={'style' : {'full_width':True}})
profile.to_file(output_file="report.html")
Summarize dataset: 100% 100/100 [00:31<00:00, 2.03it/s, Completed]
Generate report structure: 100% 1/1 [00:06<00:00, 6.61s/it]
Render HTML: 100% 1/1 [00:02<00:00, 2.85s/it]
Export report to file: 100% 1/1 [00:00<00:00, 24.96it/s]
pip install -U notebook-as-pdf
Collecting notebook-as-pdf
Downloading notebook_as_pdf-0.5.0-py3-none-any.whl (6.5 kB)
Requirement already satisfied: nbconvert in /usr/local/lib/python3.10/dist-packages (from notebook-as-pdf) (6.5.4)
Collecting pyppeteer (from notebook-as-pdf)
Downloading pyppeteer-1.0.2-py3-none-any.whl (83 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 83.4/83.4 kB 2.0 MB/s eta 0:00:00
Collecting PyPDF2 (from notebook-as-pdf)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 232.6/232.6 kB 4.9 MB/s eta 0:00:00
Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (4.9.3)
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (4.11
Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (6.1.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (0.7.1)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (
Requirement already satisfied: jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (3.1.2)
Requirement already satisfied: jupyter-core>=4.7 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (5
Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (2.1
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (0
Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (0.8
Requirement already satisfied: nbformat>=5.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (5.9.2
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (23.2)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf)
Requirement already satisfied: pygments>=2.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (2.1
Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (1.2.1)
Requirement already satisfied: traitlets>=5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->notebook-as-pdf) (5.7.
Requirement already satisfied: appdirs<2.0.0,>=1.4.3 in /usr/local/lib/python3.10/dist-packages (from pyppeteer->notebook-as-pdf
Requirement already satisfied: certifi>=2021 in /usr/local/lib/python3.10/dist-packages (from pyppeteer->notebook-as-pdf) (2023.
Requirement already satisfied: importlib-metadata>=1.4 in /usr/local/lib/python3.10/dist-packages (from pyppeteer->notebook-as-p
Collecting pyee<9.0.0,>=8.1.0 (from pyppeteer->notebook-as-pdf)
Downloading pyee-8.2.2-py2.py3-none-any.whl (12 kB)
Requirement already satisfied: tqdm<5.0.0,>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from pyppeteer->notebook-as-pdf)
Collecting urllib3<2.0.0,>=1.25.8 (from pyppeteer->notebook-as-pdf)
Downloading urllib3-1.26.18-py2.py3-none-any.whl (143 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 143.8/143.8 kB 4.7 MB/s eta 0:00:00
Collecting websockets<11.0,>=10.0 (from pyppeteer->notebook-as-pdf)
Downloading websockets-10.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 106.8/106.8 kB 5.5 MB/s eta 0:00:00
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata>=1.4->pyppeteer->no
Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.7->nbconvert->
Requirement already satisfied: jupyter-client>=6.1.12 in /usr/local/lib/python3.10/dist-packages (from nbclient>=0.5.0->nbconver
Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->noteboo
Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->notebo
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert->noteboo
Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert->notebook-as-pdf) (
Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert->notebook-as-pdf)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nb
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->n
Requirement already satisfied: pyzmq>=13 in /usr/local/lib/python3.10/dist-packages (from jupyter-client>=6.1.12->nbclient>=0.5.
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.10/dist-packages (from jupyter-client>=6.1.12->nbc
Requirement already satisfied: tornado>=4.1 in /usr/local/lib/python3.10/dist-packages (from jupyter-client>=6.1.12->nbclient>=0
Installing collected packages: pyee, websockets, urllib3, PyPDF2, pyppeteer, notebook-as-pdf
Attempting uninstall: urllib3
Found existing installation: urllib3 2.0.7
Uninstalling urllib3-2.0.7:
https://colab.research.google.com/drive/1uTw3UaM7Z9NY3GDju3aYqhJklJzOspV-?hl=fr#scrollTo=8m9B3JStQInL&printMode=true 5/5