Q 1- Create a pandas series from a dictionary of values and an ndarray.
SOURCE CODE :
import pandas as pd
import numpy as np
# Dictionary of values
dict_values = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
# NumPy ndarray
arr = np.array([5, 6, 7, 8])
# Create Pandas Series from dictionary
series_from_dict = pd.Series(dict_values)
# Create Pandas Series from ndarray
series_from_array = pd.Series(arr)
print("Series from dictionary:")
print(series_from_dict)
print("\nSeries from ndarray:")
print(series_from_array)
OUTPUT
Q 2-Create a Series and print all the elements that are above 75th percentile.
SOURCE CODE :
import pandas as pd
import numpy as np
# Create a Pandas Series
data = pd.Series(np.random.randint(0, 100, 10)) # Generating random integers between 0 and 100
print("Original Series:")
print(data)
# Calculate the 75th percentile
percentile_75 = np.percentile(data, 75)
# Print elements above the 75th percentile
print("\nElements above the 75th percentile:")
print(data[data > percentile_75])
OUTPUT
Q 3- Perform sorting on Series data and DataFrames
SOURCE CODE :
import pandas as pd
# Create a Pandas Series
data = pd.Series([3, 1, 4, 1, 5, 9, 2, 6, 5, 3])
# Sorting the Series
sorted_series = data.sort_values()
print("Sorted Series:")
print(sorted_series)
OUTPUT
Q 4 – Write a program to implement pivot() and pivot-table() on a DataFrame.
SOURCE CODE:
import pandas as pd
# Sample DataFrame
data = {
'Date': ['2024-01-01', '2024-01-01', '2024-01-02', '2024-01-02', '2024-01-03'],
'City': ['New York', 'Los Angeles', 'New York', 'Los Angeles', 'New York'],
'Temperature': [32, 75, 30, 72, 35],
'Humidity': [40, 60, 35, 55, 45]
}
df = pd.DataFrame(data)
# Using pivot() to reshape the DataFrame
pivot_df = df.pivot(index='Date', columns='City', values='Temperature')
print("DataFrame after pivot() operation:")
print(pivot_df)
# Using pivot_table() to aggregate data while reshaping the DataFrame
pivot_table_df = df.pivot_table(index='Date', columns='City', values='Temperature', aggfunc='mean')
print("\nDataFrame after pivot_table() operation:")
print(pivot_table_df)
OUTPUT
Q 5 - Write a program to find mean absolute deviation on a DataFrame.
SOURCE CODE:
import pandas as pd
# Sample DataFrame
data = {
'A': [10, 20, 30, 40, 50],
'B': [15, 25, 35, 45, 55],
'C': [20, 30, 40, 50, 60]
}
df = pd.DataFrame(data)
# Calculate the mean absolute deviation
mad = df.mad()
print("Mean Absolute Deviation:")
print(mad)
OUTPUT
Q 6- Two Series object, Population stores the details of four metro cities of India and
another object AvgIncome stores the total average income reported in four years
in these cities. Calculate income per capita for each of these metro cities.
SOURCE CODE:
import pandas as pd
# Sample data
population = pd.Series({'Delhi': 20000000, 'Mumbai': 22000000, 'Bangalore': 12000000, 'Kolkata':
15000000})
avg_income = pd.Series({'Delhi': 50000000, 'Mumbai': 60000000, 'Bangalore': 45000000, 'Kolkata':
55000000})
# Calculate income per capita
income_per_capita = avg_income / population
print("Income per capita for each metro city:")
print(income_per_capita)
OUTPUT
Q 7- Create a DataFrame based on E-Commerce data and generate mean, mode,
median.
SOURCE CODE:
import pandas as pd
# Sample e-commerce data
data = {
'Order_ID': [101, 102, 103, 104, 105],
'Product': ['Laptop', 'Mouse', 'Keyboard', 'Monitor', 'Headphones'],
'Price': [1200, 20, 50, 300, 100],
'Quantity': [1, 2, 1, 1, 1]
}
# Create DataFrame
ecommerce_df = pd.DataFrame(data)
# Generate mean, mode, and median
mean_price = ecommerce_df['Price'].mean()
mode_product = ecommerce_df['Product'].mode().values[0] # mode() returns a Series, so we get
the first value
median_quantity = ecommerce_df['Quantity'].median()
print("Mean Price:", mean_price)
print("Mode Product:", mode_product)
print("Median Quantity:", median_quantity)
OUTPUT
Q 8 - Create a DataFrame based on employee data and generate quartile and variance.
SOURCE CODE:
import pandas as pd
# Sample employee data
data = {
'Employee_ID': [101, 102, 103, 104, 105],
'Name': ['John', 'Alice', 'Bob', 'Emily', 'David'],
'Age': [30, 35, 28, 40, 45],
'Salary': [50000, 60000, 45000, 70000, 80000]
}
# Create DataFrame
employee_df = pd.DataFrame(data)
# Generate quartiles
age_quartiles = employee_df['Age'].quantile([0.25, 0.5, 0.75])
salary_quartiles = employee_df['Salary'].quantile([0.25, 0.5, 0.75])
# Calculate variance
age_variance = employee_df['Age'].var()
salary_variance = employee_df['Salary'].var()
print("Age Quartiles:")
print(age_quartiles)
print("\nSalary Quartiles:")
print(salary_quartiles)
print("\nAge Variance:", age_variance)
print("Salary Variance:", salary_variance)
OUTPUT
Q 9- Program to implement Skewness on Random data.
SOURCE CODE:
import pandas as pd
import numpy as np
# Generate random data
np.random.seed(0) # For reproducibility
random_data = np.random.normal(loc=0, scale=1, size=1000) # Generating 1000 random
numbers from a normal distribution
# Create a Pandas Series from the random data
data_series = pd.Series(random_data)
# Calculate skewness
skewness = data_series.skew()
print("Skewness of the random data:", skewness)
OUTPUT
Q 10 - Create a DateFrame on any Data and compute statistical function of Kurtosis.
SOURCE CODE:
import pandas as pd
import numpy as np
# Creating a DataFrame with random data
data = {
'A': np.random.normal(loc=0, scale=1, size=1000),
'B': np.random.normal(loc=0, scale=2, size=1000),
'C': np.random.normal(loc=0, scale=0.5, size=1000)
}
df = pd.DataFrame(data)
# Computing kurtosis
kurtosis_result = df.kurtosis()
print("Kurtosis for each column:")
print(kurtosis_result)
OUTPUT
Q 11 - Series objects Temp1, temp2, temp3, temp 4 stores the temperature of days of
week 1, week 2, week 3, week 4. Write a script to:-
a. Print average temperature per week
b. Print average temperature of entire month
SOURCE CODE:
import pandas as pd
# Sample temperature data
temp_data = {
'Temp1': [20, 22, 21, 23, 24],
'Temp2': [18, 20, 19, 22, 23],
'Temp3': [21, 23, 24, 22, 20],
'Temp4': [19, 20, 18, 21, 22]
}
# Create a DataFrame from the temperature data
df = pd.DataFrame(temp_data)
# Print average temperature per week
print("Average temperature per week:")
average_per_week = df.mean()
print(average_per_week)
# Print average temperature of entire month
average_monthly_temp = df.values.mean()
print("\nAverage temperature of entire month:", average_monthly_temp)
OUTPUT
Q 12 – Write a Program to read a CSV file and create its DataFrame.
SOURCE CODE:
# Python program to illustrate
# creating a data frame using CSV files
# import pandas module
import pandas as pd
# creating a data frame
df = pd.read_csv("CardioGoodFitness.csv")
print(df.head())
OUTPUT
Q 13 - Consider the DataFrameQtrSales where each row contains the item category,
item name and expenditure and group the rows by category, and print the
average expenditure per category.
SOURCE CODE:
import pandas as pd
# Sample DataFrame
data = {
'Category': ['Electronics', 'Clothing', 'Electronics', 'Clothing', 'Electronics'],
'Item': ['Laptop', 'Shirt', 'Headphones', 'Jeans', 'Smartphone'],
'Expenditure': [1500, 30, 100, 50, 800]
}
QtrSales = pd.DataFrame(data)
# Group by category and calculate average expenditure
average_expenditure_per_category = QtrSales.groupby('Category')['Expenditure'].mean()
print("Average expenditure per category:")
print(average_expenditure_per_category)
OUTPUT
Q 14 - Create a DataFrame having age, name, weight of five students. Write a program
to display only the weight of first and fourth rows.
SOURCE CODE:
import pandas as pd
# Sample DataFrame with age, name, and weight
data = {
'Age': [20, 22, 21, 23, 19],
'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
'Weight': [60, 65, 70, 75, 80]
}
students_df = pd.DataFrame(data)
# Displaying only the weight of the first and fourth rows
weight_first_fourth_rows = students_df.iloc[[0, 3]]['Weight']
print("Weight of first and fourth rows:")
print(weight_first_fourth_rows)
OUTPUT
Q 15 - Write a program to create a DataFrame to store weight, age and name of three
people. Print the DataFrame and its transpose.
SOURCE CODE :
import pandas as pd
# Sample data
data = {
'Name': ['Alice', 'Bob', 'Charlie'],
'Age': [25, 30, 35],
'Weight': [60, 70, 80]
}
# Create a DataFrame
people_df = pd.DataFrame(data)
# Print the DataFrame
print("DataFrame:")
print(people_df)
# Print the transpose of the DataFrame
print("\nTranspose of the DataFrame:")
print(people_df.transpose())
OUTPUT
Q 16 Create the following Dataframe Patient with four columns PatientID, Treatment_starts, Drug
and DosageQ Create the following Dataframe Patient with four columns PatientID,
Treatment_starts, Drug and Dosage
Write a code for perform the following operation on the above dataframe.
(1) To check for no. of rows in dataframe.
(ii) To show the datatype of every column.
(iii) To access first and third column.
(iv) List no. of unique drug in dataframe.
(v) Diagnose record for Patient PT5 and Cisplatin drug
(vi) Get all rows where dosage is greater than 180.
(vii) Sort actual dataframe in ascending order of PatientID and descending order of
Treatment_Starts.
(viii) To show what are the drugs used and how many patients receive those drugs?
(ix) Create a bar chart in seaborn to compare counts for the two drugs.
(x) Display the average dosage of each drug.
Source code:-
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Creating the DataFrame
data = {
'PatientID': ['PT1', 'PT20', 'PT2', 'PT5', 'PT8'],
'Treatment_Starts': ['1/14/16', '1/2/16', '1/10/16', '1/24/16', '2/14/16'],
'Drug': ['CISPLATIN', 'NIVOLUNAB', 'CISPLATIN', 'CISPLATIN', 'CISPLATIN'],
'Dosage': [200, 140, 180, 140, 190]
}
df = pd.DataFrame(data)
# (i) Check number of rows in dataframe
print("(i) Number of rows in DataFrame:", len(df))
# (ii) Show datatype of every column
print("\n(ii) Datatype of every column:")
print(df.dtypes)
# (iii) Access first and third column
print("\n(iii) First and third column:")
print(df.iloc[:, [0, 2]])
# (iv) List number of unique drugs in dataframe
unique_drugs = df['Drug'].unique()
print("\n(iv) Number of unique drugs in DataFrame:", len(unique_drugs))
print("Unique drugs:", unique_drugs)
# (v) Diagnose record for Patient PT5 and Cisplatin drug
diagnosis = df[(df['PatientID'] == 'PT5') & (df['Drug'] == 'CISPLATIN')]
print("\n(v) Diagnose record for Patient PT5 and CISPLATIN drug:")
print(diagnosis)
# (vi) Get all rows where dosage is greater than 180
high_dosage = df[df['Dosage'] > 180]
print("\n(vi) Rows where dosage is greater than 180:")
print(high_dosage)
# (vii) Sort actual dataframe in ascending order of PatientID and descending order of
Treatment_Starts
sorted_df = df.sort_values(by=['PatientID', 'Treatment_Starts'], ascending=[True, False])
print("\n(vii) Sorted DataFrame:")
print(sorted_df)
# (viii) Show what are the drugs used and how many patients receive those drugs
drug_counts = df['Drug'].value_counts()
print("\n(viii) Drugs used and number of patients receiving each drug:")
print(drug_counts)
# (ix) Create a bar chart in seaborn to compare counts for the two drugs
sns.countplot(data=df, x='Drug')
plt.title('Counts of Drugs')
plt.show()
# (x) Display the average dosage of each drug
average_dosage = df.groupby('Drug')['Dosage'].mean()
print("\n(x) Average dosage of each drug:")
print(average_dosage)
Output:-
Q17 Give the code or syntax to Perform the following operation on two 2D numpy array arrayl and
array2 and
ID array array3.
a. Add arrayl and array2
b. Find sum of arrayl elements over a given axis.
c. Find product of array2 elements over a given axis.
d. Change the dimension of an array3 to 2D.
e. Transpose the array created in part d.
f. Display 2 rows and third column of 2D array array1.
g. Join two 2D array along row.
h. Convert array2 to ID array.
i. Split an array 1 into multiple subarrays
Source code:-
import numpy as np
# Define array1 and array2
array1 = np.array([[1, 2, 3],
[4, 5, 6]])
array2 = np.array([[7, 8, 9],
[10, 11, 12]])
# a. Add array1 and array2
array3_add = np.add(array1, array2)
print("a. Addition of array1 and array2:")
print(array3_add)
# b. Find sum of array1 elements over a given axis.
sum_axis_0 = np.sum(array1, axis=0)
sum_axis_1 = np.sum(array1, axis=1)
print("\nb. Sum of array1 elements over axis 0:", sum_axis_0)
print("Sum of array1 elements over axis 1:", sum_axis_1)
# c. Find product of array2 elements over a given axis.
product_axis_0 = np.prod(array2, axis=0)
product_axis_1 = np.prod(array2, axis=1)
print("\nc. Product of array2 elements over axis 0:", product_axis_0)
print("Product of array2 elements over axis 1:", product_axis_1)
# d. Change the dimension of array3 to 2D.
array3_2d = array2.reshape(1, -1)
print("\nd. Reshaped array3 to 2D:")
print(array3_2d)
# e. Transpose the array created in part d.
array3_transpose = array3_2d.T
print("\ne. Transpose of 2D array3:")
print(array3_transpose)
# f. Display 2 rows and third column of 2D array array1.
print("\nf. 2 rows and third column of array1:")
print(array1[:2, 2])
# g. Join two 2D arrays along row.
array_join = np.concatenate((array1, array2), axis=0)
print("\ng. Joined arrays along row:")
print(array_join)
# h. Convert array2 to 1D array.
array2_flatten = array2.flatten()
print("\nh. 1D array from array2:")
print(array2_flatten)
# i. Split array1 into multiple subarrays
array1_split = np.split(array1, 2)
print("\ni. Split array1 into multiple subarrays:")
print(array1_split)
Output:-
Q 18
a) Write python code to create the following series
101 Harsh
102 Arun
103 Ankur
104 Harpal
105 Divya
106. Jeet
b) Show details of 1st 3 employees using head function
c) Show details of last 3 employees using tail function
d) Show details of 1st 3 employees without using head function e) Show details of last 3
employees without using tail function
f) Show value of index no 102.
g) Show 2nd to 4th records.
h) Show values of index no=101,103,105.
1) Show details of "Arun"
Source code:-
import pandas as pd
# Create the series
data = {
101: 'Harsh',
102: 'Arun',
103: 'Ankur',
104: 'Harpal',
105: 'Divya',
106: 'Jeet'
}
series = pd.Series(data, name='Employee')
# a) Create the series
print("(a) Series:")
print(series)
print()
# b) Show details of 1st 3 employees using head function
print("(b) Details of first 3 employees using head function:")
print(series.head(3))
print()
# c) Show details of last 3 employees using tail function
print("(c) Details of last 3 employees using tail function:")
print(series.tail(3))
print()
# d) Show details of 1st 3 employees without using head function
print("(d) Details of first 3 employees without using head function:")
print(series[:3])
print()
# e) Show details of last 3 employees without using tail function
print("(e) Details of last 3 employees without using tail function:")
print(series[-3:])
print()
# f) Show value of index no 102
print("(f) Value of index no 102:")
print(series[102])
print()
# g) Show 2nd to 4th records
print("(g) 2nd to 4th records:")
print(series[1:4])
print()
# h) Show values of index no=101,103,105
print("(h) Values of index no=101,103,105:")
print(series.loc[[101, 103, 105]])
print()
# 1) Show details of "Arun"
print('1) Show details of "Arun":')
print(series[series == 'Arun'])
Output:-
Q 19 Create a dataframe for the below given data
Write a code to perform following operations on above dataframe:
i. Print the batsman name along with runs scored in Test and T20 using column names and
dot notation.
ii. Display the Batsman name along with runs scored in ODI using loc
Display the batsman details who scored runs more than:
More than 2000 in ODI
Less than 2500 in Test
More than 1500 in T20
iv. Display the columns using column index number like 0, 2, 4.
V Display the alternated rows.
vi. Reindex the dataframe created above with batsman name and delete data of Hardik
Pandya and Shikhar Dhawan by their index from original dataframe.
vii. Insert 2 rows in the dataframe and delete rows whose index is 1 and 4.
viii. Delete a column Test, add one more column total at last (next to T20 column), make
total of ODI and T20 runs in that column.
ix. Rename column T20 with "T201 Runs".
Print the dataframe without headers.
Source code:-
import pandas as pd
# Create the DataFrame
data = {
'SNO': [1, 2, 3, 4, 5],
'Batsman': ['Virat Kohli', 'Ajinkya Rahane', 'Rohit Sharma', 'Shikhar Dhawan', 'Hardik
Pandya'],
'Test': [3543, 2578, 2280, 2158, 1879],
'ODI': [2945, 2165, 2080, 1957, 1856],
'T20': [1925, 1853, 1522, 1020, 1020]
}
df = pd.DataFrame(data)
# i. Print the batsman name along with runs scored in Test and T20 using column names
and dot notation.
print("i. Batsman name along with runs scored in Test and T20 using column names and
dot notation:")
print(df.Batsman, df.Test, df.T20)
print()
# ii. Display the Batsman name along with runs scored in ODI using loc
print("ii. Batsman name along with runs scored in ODI using loc:")
print(df.loc[:, ['Batsman', 'ODI']])
print()
# Display the batsman details who scored runs more than:
# More than 2000 in ODI
print("iii. Batsman who scored more than 2000 runs in ODI:")
print(df[df['ODI'] > 2000])
print()
# Less than 2500 in Test
print("Batsman who scored less than 2500 runs in Test:")
print(df[df['Test'] < 2500])
print()
# More than 1500 in T20
print("Batsman who scored more than 1500 runs in T20:")
print(df[df['T20'] > 1500])
print()
# iv. Display the columns using column index number like 0, 2, 4.
print("iv. Display the columns using column index number:")
print(df.iloc[:, [0, 2, 4]])
print()
# V. Display the alternated rows.
print("V. Display the alternated rows:")
print(df.iloc[::2])
print()
# vi. Reindex the dataframe created above with batsman name and delete data of Hardik
Pandya and Shikhar Dhawan by their index from original dataframe.
df.set_index('Batsman', inplace=True)
df.drop(['Hardik Pandya', 'Shikhar Dhawan'], inplace=True)
print("vi. Reindexed DataFrame with data of Hardik Pandya and Shikhar Dhawan removed:")
print(df)
print()
# vii. Insert 2 rows in the dataframe and delete rows whose index is 1 and 4.
new_rows = pd.DataFrame({'SNO': [6, 7],
'Batsman': ['New Player 1', 'New Player 2'],
'Test': [1500, 1600],
'ODI': [1400, 1500],
'T20': [1200, 1300]})
df = pd.concat([df, new_rows], ignore_index=True)
df.drop(index=[1, 4], inplace=True)
print("vii. DataFrame after inserting 2 rows and deleting rows with index 1 and 4:")
print(df)
print()
# viii. Delete a column Test, add one more column total at last (next to T20 column), make
total of ODI and T20 runs in that column.
df.drop(columns=['Test'], inplace=True)
df['Total'] = df['ODI'] + df['T20']
print("viii. DataFrame after deleting Test column and adding Total column:")
print(df)
print()
# ix. Rename column T20 with "T201 Runs".
df.rename(columns={'T20': 'T20 Runs'}, inplace=True)
print("ix. DataFrame after renaming column T20 to T201 Runs:")
print(df)
print()
# Print the dataframe without headers.
print("DataFrame without headers:")
print(df.to_string(header=False))
Output:-
Q 20 Create the following dataframe "Sales" containing year-wise sales figures for five
salespersons in INR. Use the years as column labels and the salesperson names as indexes
1.Display the indexes
2.Display the names of the columns
3.Display the dimensions, shape, size, and values
4.Display the last two rows
5.Display the first two columns
6.Change the dataframe Sales such that it becomes its transpose
7.Add data to Sales for the salesman "Sumeet" where the sales made are [196.2, 37800, 52000,
78438] in the years [2014, 2015, 2016, 2017] respectively
8.Delete the data for the the year 2014
9.Update the sale made by Shruti in 2017 to 100000
10.Export the dataframe Sales to a comma separated file "SalesFigures.csv" on the disk. Do not
export the indexes or column names
11.Change the name of the salesperson "Ankit" to "Vivaan" and "Kinshuk" to "Shailesh"
12.Delete the data for the salesman "Madhu"
Source Code:-
import pandas as pd
data = {
'2014': [100.5, 150.8, 200.9, 30000, 40000],
'2015': [12000, 18000, 22000, 30000, 45000],
'2016': [2000, 5000, 70000, 1000, 1250],
'2017': [50000, 60000, 70000, 80000, 90000]
}
Sales = pd.DataFrame(
data,
index=['Madhu', 'Kusum', 'Kinshuk', 'Ankit', 'Shruti']
)
# 1) Display the indexes
print('\n1)', Sales.index)
# 2) Display the names of the columns
print('\n2)', Sales.columns)
# 3) Display the dimensions, shape, size, and values
print(f'\n3) Dimensions: {Sales.ndim}\nShape: {Sales.shape}\nSize: {Sales.size}\nValues:
\n{Sales.values}')
# 4) Display the last two rows
new_df = Sales.tail(2)
print('\n4)')
display_df(new_df, index=True)
# 5) Display the first two columns
new_df = Sales.iloc[:, :2]
print('\n5)')
display_df(new_df, index=True)
# 6) Change the dataframe Sales such that it becomes its transpose
new_df = Sales.T
print('\n6)')
display_df(new_df, index=True)
# 7) Add data to Sales for the salesman "Sumeet" where the sales made are [196.2, 37800, 52000,
78438] in the years [2014, 2015, 2016, 2017] respectively
to_add = pd.DataFrame([[196.2, 37800, 52000, 78438]], columns=Sales.columns, index=['Sumeet'])
new_df = pd.concat([Sales, to_add])
print('\n7)')
display_df(new_df, index=True)
# 8) Delete the data for the year 2014
new_df = Sales.drop(['2014'], axis=1)
print('\n8)')
display_df(new_df, index=True)
# 9) Update the sale made by Shruti in 2017 to 100000
new_df = Sales.copy()
new_df.loc['Shruti', '2017'] = 100000
print('\n9)')
display_df(new_df, index=True)
# 10) Export the dataframe Sales to a comma separated file "SalesFigures.csv" on the disk. Do not
export the indexes or column names
Sales.to_csv('SalesFigures.csv', index=False, header=False)
print('\n10) Successfully exported the Sales dataframe to SalesFigures.csv without indexes and
column names')
# 11) Change the name of the salesperson "Ankit" to "Vivaan" and "Kinshuk" to "Shailesh"
to_rename = {'Ankit': 'Vivaan', 'Kinshuk': 'Shailesh'}
new_df = Sales.rename(index=to_rename)
print('\n11)')
display_df(new_df, index=True)
# 12) Delete the data for the salesman "Madhu"
new_df = Sales.drop(index='Madhu')
print('\n12)')
display_df(new_df, index=True)
Output:-