In [1]: 1 x=1
2 y=2.8
3 z=1j
4 print(type(x))
5 print(type(y))
6 print(type(z))
<class 'int'>
<class 'float'>
<class 'complex'>
In [2]: 1 x=3+5j
2 y=5j
3 z=5j
4 print(type(x))
5 print(type(y))
6 print(type(z))
<class 'complex'>
<class 'complex'>
<class 'complex'>
In [7]: 1 x=1
2 y=2.8
3 z=1j
4
5 a= float(x)
6 b= int(y)
7 c= complex(x)
8
9 print(a)
10 print(b)
11 print(c)
12
13 print(type(a))
14 print(type(b))
15 print(type(c))
1.0
2
(1+0j)
<class 'float'>
<class 'int'>
<class 'complex'>
27-1-2025 - PYTHON LIBRARY
#python libraries NumPy: NumPy is used for numerical operations and handling arrays.
In [5]: 1 import numpy as np
2
3 array1 = np.array([1,2,3,4])
4
5 print("NumPy Array:", array1)
NumPy Array: [1 2 3 4]
Pandas is used for data manipulation and analysis.
In [7]: 1 import pandas as pd
2
3 data = {"Name": ["Alice", "bob", "Ritika"], "Age": [25, 30, 19]}
4 df = pd.DataFrame(data)
5
6 print("DataFrame:")
7 print(df)
DataFrame:
Name Age
0 Alice 25
1 bob 30
2 Ritika 19
Matplotlib is used for data visualization
In [8]: 1 import matplotlib.pyplot as plt
2
3 x= [1, 2, 3, 4]
4 y= [10, 20, 25, 30]
5
6 plt.plot(x, y)
7 plt.title("Simple Line Graph")
8 plt.show()
In [10]: 1 import matplotlib.pyplot as plt
2
3 x= [1, 2, 3, 4, 5]
4 y= [2, 4, 6, 8, 10]
5
6 plt.plot(x, y, label="Line Plot", color="blue", marker="*")
7
8 plt.xlabel("X-Axis")
9 plt.ylabel("Y-Axis")
10 plt.title("Simple Line Graph")
11
12
13 plt.legend()
14
15 plt.show()
In [11]: 1 #data for the bar chart
2
3 categories = ['A', 'B', 'C', 'D']
4 values = [5, 7, 3, 8]
5
6 #create a bar chart
7 plt.bar(categories, values, color='orange')
8
9 #add labels and title
10
11 plt.xlabel("categories")
12 plt.ylabel("values")
13 plt.title("simple bar chart")
14
15 #display the plot
16 plt.show()
Date 28-01-2025 SCATTER PLOT EXAMPLE
In [6]: 1 import matplotlib.pyplot as plt
2 # data for the scatter plot
3 x = [1, 2, 3, 4, 5]
4 y = [5, 7, 6, 8, 9]
5
6 #create a scatter plot
7 plt.scatter(x, y, color='green', marker='x')
8
9 #add labels and titles
10 plt.xlabel("x-axis")
11 plt.ylabel("y-axis")
12 plt.title("simple scatter plot")
13
14 #display the plot
15 plt.show()
Data cleaning and Handling Missing Values
In [7]: 1 import pandas as pd
2 import numpy as np
3
4 #create a simple dataset with missing values
5 data = {
6 'Name': ['John', 'Sarah', 'Mike', 'Emma', 'David'],
7 'Age': [15, np.nan, 14, 16, np.nan], #missing values in age column
8 'Grade': ['A', 'B', 'A', 'C', 'B']
9 }
10 #convert the data into a dataframe
11 df = pd.DataFrame(data)
12
13 #print the original data
14 print("Original Data:\n", df)
15
16 #handling missing values:
17 #1. Filling missing age values with the average (mean)of the 'age' colu
18 mean_age = df['Age'].mean() #calculate mean of age column
19 df["Age"].fillna(mean_age, inplace=True)
20
21 #2. Dropping rows with missing values in 'Grade' (if any)
22 df.dropna(subset=['Grade'], inplace=True)
23
24 #print the cleaned data
25 print("\nCleaned Data (after handling missing values):\n", df)
Original Data:
Name Age Grade
0 John 15.0 A
1 Sarah NaN B
2 Mike 14.0 A
3 Emma 16.0 C
4 David NaN B
Cleaned Data (after handling missing values):
Name Age Grade
0 John 15.0 A
1 Sarah 15.0 B
2 Mike 14.0 A
3 Emma 16.0 C
4 David 15.0 B
In [1]: 1 import pandas as pd # Importing pandas library
2
3 # Step 1: Read the CSV file ('C:\Users\Preethi\Desktop\students.csv')
4 # Sample data in the CSV file:
5 # Name, Age, Grade
6 # John, 15, A
7 # Sarah, , B
8 # Mike, 14, A
9 # Emma, 16,
10 # David, , C
11
12 # Load the data into a DataFrame
13 df = pd.read_csv('C:\\Users\\ritik\\Downloads\\students1.csv')
14
15 # Step 2: Display the original data
16 print("Original Data:\n", df)
17
18 # Step 3: Data Cleaning
19 # a. Fill missing Age values with the mean of the Age column
20 df['Age'].fillna(df['Age'].mean(), inplace=True)
21
22 # b. Fill missing Grade values with a placeholder 'Not Graded'
23 df['Grade'].fillna('Not Graded', inplace=True)
24
25 # c. Check for and remove duplicate rows (if any)
26 df.drop_duplicates(inplace=True)
27
28 # Step 4: Display the cleaned data
29 print("\nCleaned Data:\n", df)
Original Data:
Name Age Grade
0 John 15.0 A
1 Janvi NaN B
2 Mike 14.0 A
3 Emma 16.0 NaN
4 kamakshr NaN C
Cleaned Data:
Name Age Grade
0 John 15.0 A
1 Janvi 15.0 B
2 Mike 14.0 A
3 Emma 16.0 Not Graded
4 kamakshr 15.0 C
Feature Scaling and Normalization
In [2]: 1 import pandas as pd
2 from sklearn.preprocessing import MinMaxScaler, StandardScaler
3
4 # Step 1: Create sample data
5 data = {'Student': ['Alice', 'Bob', 'Charlie', 'Diana'],
6 'Math_Score': [90, 45, 78, 62],
7 'Science_Score': [85, 40, 72, 65]}
8
9 # Convert to a DataFrame
10 df = pd.DataFrame(data)
11
12 # Step 2: Display original data
13 print("Original Data:\n", df)
14
15 # Step 3: Apply Min-Max Scaling
16 min_max_scaler = MinMaxScaler()
17 df[['Math_Score_Scaled', 'Science_Score_Scaled']] = min_max_scaler.fit_
18
19 # Step 4: Apply Standardization
20 standard_scaler = StandardScaler()
21 df[['Math_Score_Standardized', 'Science_Score_Standardized']] = standar
22
23 # Step 5: Display scaled and standardized data
24 print("\nData After Scaling and Standardization:\n", df)
Original Data:
Student Math_Score Science_Score
0 Alice 90 85
1 Bob 45 40
2 Charlie 78 72
3 Diana 62 65
Data After Scaling and Standardization:
Student Math_Score Science_Score Math_Score_Scaled \
0 Alice 90 85 1.000000
1 Bob 45 40 0.000000
2 Charlie 78 72 0.733333
3 Diana 62 65 0.377778
Science_Score_Scaled Math_Score_Standardized Science_Score_Standardiz
ed
0 1.000000 1.255031 1.1905
97
1 0.000000 -1.402682 -1.5569
34
2 0.711111 0.546308 0.3968
66
3 0.555556 -0.398657 -0.0305
28
Encoding categorical variables is a way to convert text-based data into a numerical format
that a computer can understand. For example, if we have a column for "Colors" with values
like "Red," "Blue," and "Green," we need to turn those words into numbers for machine
learning models to work with them.
In [4]: 1 #Importing the library
2 import pandas as pd
3
4 # Sample data (a small table)
5 data = {
6 'Student': ['Alice', 'Bob', 'Charlie'],
7 'Favorite Subject': ['Math', 'Science', 'Math']
8 }
9
10 # Creating a DataFrame
11 df = pd.DataFrame(data)
12
13 # Printing the original DataFrame
14 print("Original Data:")
15 print(df)
16
17 # Encoding the categorical variable (Favorite Subject)
18 df['Favorite Subject Encoded'] = df['Favorite Subject'].astype('categor
19
20 # Printing the updated DataFrame
21 print("\nData after encoding:")
22 print(df)
Original Data:
Student Favorite Subject
0 Alice Math
1 Bob Science
2 Charlie Math
Data after encoding:
Student Favorite Subject Favorite Subject Encoded
0 Alice Math 0
1 Bob Science 1
2 Charlie Math 0
In [ ]: 1