Module 6 NumPy and Pandas
Creating NumPy arrays, Indexing and slicing in NumPy, creating multidimensional arrays,
NumPy Data types, Array Attribute, Indexing and Slicing, Creating array views copies,
Manipulating array shapes I/O
Basics of Pandas, Using multilevel series, Series and Data Frames,
Grouping, aggregating, Merge Data Frames
Numpy
# Python program to demonstrate
# basic array characteristics
import numpy as np
# Creating array object
arr = np.array( [[ 1, 2, 3],
[ 4, 2, 5]] )
# Printing type of arr object
print("Array is of type: ", type(arr))
# Printing array dimensions (axes)
print("No. of dimensions: ", arr.ndim)
# Printing shape of array
print("Shape of array: ", arr.shape)
# Printing size (total number of elements) of array
print("Size of array: ", arr.size)
# Printing type of elements in array
print("Array stores elements of type: ", arr.dtype)
Creating an array
# Python program to demonstrate
# array creation techniques
import numpy as np
# Creating array from list with type float
a = np.array([[1, 2, 4], [5, 8, 7]], dtype = 'float')
print ("Array created using passed list:\n", a)
# Creating array from tuple
b = np.array((1 , 3, 2))
print ("\nArray created using passed tuple:\n", b)
# Creating a 3X4 array with all zeros
c = np.zeros((3, 4))
print ("\nAn array initialized with all zeros:\n", c)
# Create a constant value array of complex type
d = np.full((3, 3), 6, dtype = 'complex')
print ("\nAn array initialized with all 6s."
"Array type is complex:\n", d)
# Create an array with random values
e = np.random.random((2, 2))
print ("\nA random array:\n", e)
# Create a sequence of integers
# from 0 to 30 with steps of 5
f = np.arange(0, 30, 5)
print ("\nA sequential array with steps of 5:\n", f)
# Create a sequence of 10 values in range 0 to 5
g = np.linspace(0, 5, 10)
print ("\nA sequential array with 10 values between"
"0 and 5:\n", g)
# Reshaping 3X4 array to 2X2X3 array
arr = np.array([[1, 2, 3, 4],
[5, 2, 4, 2],
[1, 2, 0, 1]])
newarr = arr.reshape(2, 2, 3)
print ("\nOriginal array:\n", arr)
print ("Reshaped array:\n", newarr)
# Flatten array
arr = np.array([[1, 2, 3], [4, 5, 6]])
flarr = arr.flatten()
print ("\nOriginal array:\n", arr)
print ("Fattened array:\n", flarr)
Array Indexing:
# Python program to demonstrate
# array creation techniques
import numpy as np
# Creating array from list with type float
a = np.array([[1, 2, 4], [5, 8, 7]], dtype = 'float')
print ("Array created using passed list:\n", a)
# Creating array from tuple
b = np.array((1 , 3, 2))
print ("\nArray created using passed tuple:\n", b)
# Creating a 3X4 array with all zeros
c = np.zeros((3, 4))
print ("\nAn array initialized with all zeros:\n", c)
# Create a constant value array of complex type
d = np.full((3, 3), 6, dtype = 'complex')
print ("\nAn array initialized with all 6s."
"Array type is complex:\n", d)
# Create an array with random values
e = np.random.random((2, 2))
print ("\nA random array:\n", e)
# Create a sequence of integers
# from 0 to 30 with steps of 5
f = np.arange(0, 30, 5)
print ("\nA sequential array with steps of 5:\n", f)
# Create a sequence of 10 values in range 0 to 5
g = np.linspace(0, 5, 10)
print ("\nA sequential array with 10 values between"
"0 and 5:\n", g)
# Reshaping 3X4 array to 2X2X3 array
arr = np.array([[1, 2, 3, 4],
[5, 2, 4, 2],
[1, 2, 0, 1]])
newarr = arr.reshape(2, 2, 3)
print ("\nOriginal array:\n", arr)
print ("Reshaped array:\n", newarr)
# Flatten array
arr = np.array([[1, 2, 3], [4, 5, 6]])
flarr = arr.flatten()
print ("\nOriginal array:\n", arr)
print ("Fattened array:\n", flarr)
Basic operations:
import numpy as np
a = np.array([1, 2, 5, 3])
# add 1 to every element
print ("Adding 1 to every element:", a+1)
# subtract 3 from each element
print ("Subtracting 3 from each element:", a-3)
# multiply each element by 10
print ("Multiplying each element by 10:", a*10)
# square each element
print ("Squaring each element:", a**2)
# modify existing array
a *= 2
print ("Doubled each element of original array:", a)
# transpose of array
a = np.array([[1, 2, 3], [3, 4, 5], [9, 6, 0]])
print ("\nOriginal array:\n", a)
print ("Transpose of array:\n", a.T)
Unary operators:
# Python program to demonstrate
# unary operators in numpy
import numpy as np
arr = np.array([[1, 5, 6],
[4, 7, 2],
[3, 1, 9]])
# maximum element of array
print ("Largest element is:", arr.max())
print ("Row-wise maximum elements:",
arr.max(axis = 1))
# minimum element of array
print ("Column-wise minimum elements:",
arr.min(axis = 0))
# sum of array elements
print ("Sum of all array elements:",
arr.sum())
# cumulative sum along each row
print ("Cumulative sum along each row:\n",
arr.cumsum(axis = 1))
Binary operators:
# Python program to demonstrate
# binary operators in Numpy
import numpy as np
a = np.array([[1, 2],
[3, 4]])
b = np.array([[4, 3],
[2, 1]])
# add arrays
print ("Array sum:\n", a + b)
# multiply arrays (elementwise multiplication)
print ("Array multiplication:\n", a*b)
# matrix multiplication
print ("Matrix multiplication:\n", a.dot(b))
Universal functions
import numpy as np
# create an array of sine values
a = np.array([0, np.pi/2, np.pi])
print ("Sine values of array elements:", np.sin(a))
# exponential values
a = np.array([0, 1, 2, 3])
print ("Exponent of array elements:", np.exp(a))
# square root of array values
print ("Square root of array elements:", np.sqrt(a))
Pandas
Creating a series
import pandas as pd
import numpy as np
# Creating empty series
ser = pd.Series()
print(ser)
# simple array
data = np.array(['abc', 'pqr', ‘xyz', 'lmn', ‘uvw'])
ser = pd.Series(data)
print(ser)
Pandas Dataframe
import pandas as pd
# Calling DataFrame constructor
df = pd.DataFrame()
print(df)
# list of strings
lst = ['Geeks', 'For', 'Geeks', 'is',
'portal', 'for', 'Geeks']
# Calling DataFrame constructor on list
df = pd.DataFrame(lst)
print(df)
Extracting top 5 rows of a dataframe
# importing pandas module
import pandas as pd
# making data frame
data = pd.read_csv(r"E:\Megha Course Related Documents\Python\PPTs\student_data.csv",’r’)
# calling head() method
# storing in new variable
data_top = data.head()
# display
print(data_top)
N rows from a dataframe
# importing pandas module
import pandas as pd
# making data frame
data = pd.read_csv("E:\Megha Course Related Documents\Python\PPTs\student_data.csv")
# number of rows to return
n=9
series=data["Name"]
# returning top n rows
top = series.head(n = n)
# display
print(top)
Grouping, aggregating data frames
# import module
import pandas as pd
# Creating our dataset
df = pd.DataFrame([[9, 4, 8, 9],
[8, 10, 7, 6],
[7, 6, 8, 5]],
columns=['Maths', 'English',
'Science', 'History'])
# display dataset
print(df)
#sum of every value
print(df.sum())
#claculate sum min max
print(df.agg(['sum', 'min', 'max']))
#summary of dataset
a = df.groupby('Maths')
print(a.first())
Merging Data frames
# importing pandas module
import pandas as pd
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd']}
# Define a dictionary containing employee data
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'],
'Age':[17, 14, 12, 52],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
print(df, "\n\n", df1)
print('***************')
frames = [df, df1]
res1 = pd.concat(frames)
print(res1)
print('************')
Merging of frames
# importing pandas module
import pandas as pd
# Define a dictionary containing employee data
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
'Age':[27, 24, 22, 32],
'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'],
'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
'Mobile No': [97, 91, 58, 76]}
# Define a dictionary containing employee data
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'],
'Age':[22, 32, 12, 52],
'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'],
'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
'Salary':[1000, 2000, 3000, 4000]}
# Convert the dictionary into DataFrame
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
# Convert the dictionary into DataFrame
df1 = pd.DataFrame(data2, index=[2, 3, 6, 7])
print(df, "\n\n", df1)
print('********************************')
# applying concat with axes
# join = 'inner'
res2 = pd.concat([df, df1], axis=1, join='inner')
print(res2)
print('********************************')
# applying concat with axes
# join = 'outer'
res3 = pd.concat([df, df1], axis=1, join='outer')
print(res3)