doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
INTRODUCTION TO NUMPY
# Create an array of integers from one to ten
1. Your first NumPy array
one_to_ten = np.arange(1, 11)
# edited/added # Create your scatterplot
import numpy as np plt.scatter(one_to_ten, doubling_array)
sudoku_list = np.load('sudoku_game.npy') plt.show()
# Import NumPy
4. Array dimensionality
import numpy as np
# Convert sudoku_list into an array # edited/added
sudoku_array = np.array(sudSoku_list) sudoku_solution = np.load('sudoku_solution.npy')
# Print the type of sudoku_array sudoku_list = np.load('sudoku_game.npy')
print(type(sudoku_array)) sudoku_game = np.array(sudoku_list)
# Create the game_and_solution 3D array
2. Creating arrays from scratch
game_and_solution = np.array([sudoku_game, sudoku_solution])
# Create an array of zeros which has four columns and two rows # Print game_and_solution
zero_array = np.zeros((2, 4)) print(game_and_solution)
print(zero_array)
5. The fourth dimension
# Create an array of random floats which has six columns and three rows
random_array = np.random.random((3, 6)) # edited/added
print(random_array) new_sudoku_game = np.load('new_sudoku_game.npy')
new_sudoku_solution = np.load('new_sudoku_solution.npy')
3. A range array
game_and_solution = np.load('game_and_solution.npy')
doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] # Create a second 3D array of another game and its solution
plt.scatter(x_values, y_values) new_game_and_solution = np.array([new_sudoku_game, new_sudoku_soluti
plt.show() on])
# edited/addedfrom matplotlib import pyplot as plt # Create a 4D array of both game and solution 3D arrays
games_and_solutions = np.array([game_and_solution, new_game_and_soluti # Print the data type of zero_array
on]) print(zero_array.dtype)
# Print the shape of your 4D array # Create an array of zeros with three rows and two columns
print(games_and_solutions.shape) zero_array = np.zeros((3, 2))
6. Flattening and reshaping # Print the data type of zero_array
print(zero_array.dtype)
# edited/added
# Create a new array of int32 zeros with three rows and two columns
sudoku_game = np.load('sudoku_game_new.npy')
zero_int_array = np.zeros((3, 2), dtype=np.int32)
# Flatten sudoku_game
# Print the data type of zero_int_array
flattened_game = sudoku_game.flatten()
print(zero_int_array.dtype)
# Print the shape of flattened_game
print(flattened_game.shape) 8. Anticipating data types
# Flatten sudoku_game A string data type
flattened_game = sudoku_game.flatten()
np.array([78.988, "NumPy", True])
# Print the shape of flattened_game np.array([9, 1.12, True]).astype("<U5")
print(flattened_game.shape)
An integer data type
# Reshape flattened_game back to a nine by nine array
reshaped_game = flattened_game.reshape((9, 9)) np.array([34.62, 70.13, 9]).astype(np.int64)
np.array([45.67, True], dtype=np.int8)
# Print sudoku_game and reshaped_game
print(sudoku_game) A float data type
print(reshaped_game)
np.array([[6, 15.7], [True, False]])
np.random.random((4, 5))
7. NumPy data types
9. A smaller sudoku game
The dtype argument
# Print the data type of sudoku_game
# Create an array of zeros with three rows and two columns
print(sudoku_game.dtype)
zero_array = np.zeros((3, 2))
# Print the data type of sudoku_game
print(sudoku_game.dtype) hundred_diameters = tree_census[:100, 2]
# Change the data type of sudoku_game to int8 print(hundred_diameters)
small_sudoku_game = sudoku_game.astype(np.int8) # Create an array of trunk diameters with even row indices from 50 to 100 in
# Print the data type of small_sudoku_game clusive
print(small_sudoku_game.dtype) every_other_diameter = tree_census[50:101:2, 2]
print(every_other_diameter)
10. Slicing and indexing trees
12. Sorting trees
# edited/added
tree_census = np.load('tree_census.npy') # Extract trunk diameters information and sort from smallest to largest
# Select all rows of block ID data from the second column sorted_trunk_diameters = np.sort(tree_census[:, 2])
block_ids = tree_census[:, 1] print(sorted_trunk_diameters)
# Print the first five block_ids 13. Filtering with masks
print(block_ids[:5])
# Create an array which contains row data on the largest tree in tree_census
# Select all rows of block ID data from the second column
largest_tree_data = tree_census[tree_census[:, 2] == 51]
block_ids = tree_census[:, 1]
print(largest_tree_data)
# Select the tenth block ID from block_ids
# Create an array which contains row data on the largest tree in tree_census
tenth_block_id = block_ids[9]
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(tenth_block_id)
print(largest_tree_data)
# Select all rows of block ID data from the second column
# Slice largest_tree_data to get only the block id
block_ids = tree_census[:, 1]
largest_tree_block_id = largest_tree_data[:, 1]
# Select five block IDs from block_ids starting with the tenth ID
print(largest_tree_block_id)
block_id_slice = block_ids[9:14]
# Create an array which contains row data on the largest tree in tree_census
print(block_id_slice)
largest_tree_data = tree_census[tree_census[:, 2] == 51]
11. Stepping into 2D print(largest_tree_data)
# Create an array of the first 100 trunk diameters from tree_census # Slice largest_tree_data to get only the block ID
largest_tree_block_id = largest_tree_data[:, 1]
print(largest_tree_block_id) Compatible along the second axis
# Create an array which contains row data on all trees with largest_tree_blo (4, 2) and (4, 3)
ck_id
trees_on_largest_tree_block = tree_census[tree_census[:, 1] == largest_tree_ Not compatible
block_id]
(5, 2) and (7, 4)
print(trees_on_largest_tree_block) (4, 2) and (4,)
(4, 2) and (2,)
14. Fancy indexing vs. np.where()
17. Adding rows
# Create the block_313879 array containing trees on block 313879
block_313879 = tree_census[tree_census[:, 1] == 313879] # edited/added
print(block_313879) new_trees = np.array([[1211, 227386, 20, 0], [1212, 227386, 8, 0]])
# Create an array of row_indices for trees on block 313879 # Print the shapes of tree_census and new_trees
row_indices = np.where(tree_census[:, 1] == 313879) print(tree_census.shape, new_trees.shape)
# Create an array which only contains data for trees on block 313879 # Print the shapes of tree_census and new_trees
block_313879 = tree_census[row_indices] print(tree_census.shape, new_trees.shape)
print(block_313879) # Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees))
15. Creating arrays from conditions print(updated_tree_census)
# Create and print a 1D array of tree and stump diameters
18. Adding columns
trunk_stump_diameters = np.where(tree_census[:, 2] == 0, tree_census[:, 3], t
ree_census[:, 2]) # Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters) print(trunk_stump_diameters.shape, tree_census.shape)
# Print the shapes of tree_census and trunk_stump_diameters
16. Compatible or not?
print(trunk_stump_diameters.shape, tree_census.shape)
Compatible along the first axis
# Reshape trunk_stump_diameters
(4, 2) and (6, 2) reshaped_diameters = trunk_stump_diameters.reshape((1000, 1))
(15, 5) and (100, 5)
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape) monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
# Reshape trunk_stump_diameters print(monthly_industry_sales)
reshaped_diameters = trunk_stump_diameters.reshape((1000, 1)) # Create a 2D array of total monthly sales across industries
# Concatenate reshaped_diameters to tree_census as the last column monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
concatenated_tree_census = np.concatenate((tree_census, reshaped_diameter print(monthly_industry_sales)
s), axis=1) # Add this column as the last column in monthly_sales
print(concatenated_tree_census) monthly_sales_with_total = np.concatenate((monthly_sales, monthly_industr
y_sales), axis=1)
19. Deleting with np.delete()
print(monthly_sales_with_total)
# Delete the stump diameter column from tree_census
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) 21. Plotting averages
# Save the indices of the trees on block 313879 # Create the 1D array avg_monthly_sales
private_block_indices = np.where(tree_census[:, 1] == 313879) avg_monthly_sales = monthly_sales.mean(axis=1)
# Delete the stump diameter column from tree_census print(avg_monthly_sales)
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) # Create the 1D array avg_monthly_sales
# Save the indices of the trees on block 313879 avg_monthly_sales = monthly_sales.mean(axis=1)
private_block_indices = np.where(tree_census[:,1] == 313879) print(avg_monthly_sales)
# Delete the rows for trees on block 313879 from tree_census_no_stumps # Plot avg_monthly_sales by month
tree_census_clean = np.delete(tree_census_no_stumps, private_block_indices, plt.plot(np.arange(1, 13), avg_monthly_sales, label="Average sales across in
axis=0) dustries")
# Print the shape of tree_census_clean # Plot department store sales by month
print(tree_census_clean.shape) plt.plot(np.arange(1, 13), monthly_sales[:, 2], label="Department store sales")
plt.legend()
20. Sales totals
plt.show()
# edited/added
monthly_sales = np.load('monthly_sales.npy') 22. Cumulative sales
# Create a 2D array of total monthly sales across industries # Find cumulative monthly sales for each industry
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) # edited/added
print(cumulative_monthly_industry_sales) monthly_industry_multipliers = np.load('monthly_industry_multipliers.npy')
# Find cumulative monthly sales for each industry # Create an array of monthly projected sales for all industries
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) projected_monthly_sales = monthly_sales * monthly_industry_multipliers
print(cumulative_monthly_industry_sales) print(projected_monthly_sales)
# Plot each industry's cumulative sales by month as separate lines # Create an array of monthly projected sales for all industries
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 0], label="Li projected_monthly_sales = monthly_sales * monthly_industry_multipliers
quor Stores") print(projected_monthly_sales)
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 1], label="R # Graph current liquor store sales and projected liquor store sales by month
estaurants")
plt.plot(np.arange(1, 13), monthly_sales[:, 0], label="Current liquor store sale
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 2], label="D s")
epartment stores")
plt.plot(np.arange(1, 13), projected_monthly_sales[:, 0], label="Projected liq
plt.legend() uor store sales")
plt.show() plt.legend()
23. Tax calculations plt.show()
# Create an array of tax collected by industry and month 25. Vectorizing .upper()
tax_collected = monthly_sales * 0.05 # edited/added
print(tax_collected) names = np.array([["Izzy", "Monica", "Marvin"],
# Create an array of tax collected by industry and month ["Weber", "Patel", "Hernandez"]])
tax_collected = monthly_sales * 0.05 # Vectorize the .upper() string method
print(tax_collected) vectorized_upper = np.vectorize(str.upper)
# Create an array of sales revenue plus tax collected by industry and month # Apply vectorized_upper to the names array
total_tax_and_revenue = tax_collected + monthly_sales uppercase_names = vectorized_upper(names)
print(total_tax_and_revenue) print(uppercase_names)
24. Projecting sales 26. Broadcastable or not?
Broadcastable # Print the shapes of mean_multipliers and monthly_sales
(3, 4) and (1, 4) print(mean_multipliers.shape, monthly_sales.shape)
(3, 4) and (4, ) # Find the mean sales projection multiplier for each industry
(3, 4) and (3, 1)
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
Not Broadcastable print(mean_multipliers)
# Print the shapes of mean_multipliers and monthly_sales
(3, 4) and (1, 2)
(3, 4) and (4, 1) print(mean_multipliers.shape, monthly_sales.shape)
(3, 4) and (3, ) # Multiply each value by the multiplier for that industry
27. Broadcasting across columns projected_sales = monthly_sales * mean_multipliers
print(projected_sales)
# edited/added
monthly_growth_rate = [1.01, 1.03, 1.03, 1.02, 1.05, 1.03, 1.06, 1.04, 1.03, 1. 29. Loading .npy files
04, 1.02, 1.01]
# Convert monthly_growth_rate into a NumPy array # Load the mystery_image.npy file with open("mystery_image.npy", "rb") as
f:
monthly_growth_1D = np.array(monthly_growth_rate)
rgb_array = np.load(f)
# Reshape monthly_growth_1D
monthly_growth_2D = monthly_growth_1D.reshape((12, 1))
plt.imshow(rgb_array)
# Multiply each column in monthly_sales by monthly_growth_2D
plt.show()
print(monthly_growth_2D * monthly_sales)
30. Getting help
28. Broadcasting across rows
# Display the documentation for .astype()
# Find the mean sales projection multiplier for each industry
help(np.ndarray.astype)
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
print(mean_multipliers) 31. Update and save
# Find the mean sales projection multiplier for each industry
# edited/added
mean_multipliers = monthly_industry_multipliers.mean(axis=0)
rgb_array = np.load('rgb_array.npy')
print(mean_multipliers)
# Reduce every value in rgb_array by 50 percent
darker_rgb_array = rgb_array * 0.5 33. Transposing your masterpiece
# Reduce every value in rgb_array by 50 percent # Transpose rgb_array
darker_rgb_array = rgb_array * 0.5 transposed_rgb = np.transpose(rgb_array, axes=(1, 0, 2))
# Convert darker_rgb_array into an array of integers plt.imshow(transposed_rgb)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) plt.show()
plt.imshow(darker_rgb_int_array)
34. 2D split and stack
plt.show()
# Reduce every value in rgb_array by 50 percent # Split monthly_sales into quarterly data
darker_rgb_array = rgb_array * 0.5 q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
# Convert darker_rgb_array into an array of integers print(q1_sales)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) # Split monthly_sales into quarterly data
plt.imshow(darker_rgb_int_array) q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
plt.show() print(q1_sales)
# Save darker_rgb_int_array to an .npy file called darker_monet.npywith op # Stack the four quarterly sales arrays
en("darker_monet.npy", "wb") as f: quarterly_sales = np.stack([q1_sales, q2_sales, q3_sales, q4_sales])
np.save(f, darker_rgb_int_array) print(quarterly_sales)
32. Augmenting Monet 35. Splitting RGB data
# Flip rgb_array so that it is the mirror image of the original # Split rgb_array into red, green, and blue arrays
mirrored_monet = np.flip(rgb_array, axis=1) red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2)
plt.imshow(mirrored_monet) # Split rgb_array into red, green, and blue arrays
plt.show() red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2)
# Flip rgb_array so that it is upside down # Create emphasized_blue_array
upside_down_monet = np.flip(rgb_array, axis=(0, 1)) emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, blu
plt.imshow(upside_down_monet) e_array)
plt.show() # Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Split rgb_array into red, green, and blue arrays plt.imshow(emphasized_blue_monet)
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) plt.show()
# Create emphasized_blue_array
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, blu
e_array)
# Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Remove the trailing dimension from emphasized_blue_array
emphasized_blue_array_2D = emphasized_blue_array.reshape((675, 843)) #
edited/added
36. Stacking RGB data
# Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
# Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
# Reshape red_array and green_array
red_array_2D = red_array.reshape((675, 843)) # edited/added
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
# Reshape red_array and green_array
red_array_2D = red_array.reshape((675, 843)) # edited/added
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Stack red_array_2D, green_array_2D, and emphasized_blue_array_2D
emphasized_blue_monet = np.stack([red_array_2D, green_array_2D, emphas
ized_blue_array_2D], axis=2)