Pandas Cheat Sheet
1. Importing and Inspecting Data
import pandas as pd
# Read data from a CSV file
df = pd.read_csv("file.csv")
# Inspect the first or last rows
df.head(5) # First 5 rows
df.tail(5) # Last 5 rows
# Basic info about the DataFrame
df.info() # Data types and non-null counts
df.describe() # Summary statistics for numerical columns
# Check the shape (rows, columns)
df.shape
# View column names
df.columns
2. Selecting and Filtering Data
# Select a column (returns a Series)
df["column_name"]
df.column_name # Alternative if the column name has no spaces
# Select multiple columns
df[["col1", "col2"]]
# Filter rows based on conditions
df[df["column_name"] > 50] # Rows where column > 50
df[(df["col1"] > 50) & (df["col2"] < 30)] # Multiple conditions (AND)
df[df["column_name"].isin([10, 20, 30])] # Rows where column is in a list
# Select rows by index
df.iloc[0] # First row (by position)
df.loc[0] # First row (by index value)
df.iloc[0:5] # First 5 rows
3. Adding, Modifying, and Dropping Data
# Add a new column
df["new_col"] = df["col1"] + df["col2"]
# Modify a column
df["col1"] = df["col1"] * 10
# Drop columns
df = df.drop(columns=["col1", "col2"])
# Drop rows by index
df = df.drop(index=[0, 1])
# Rename columns
df = df.rename(columns={"old_name": "new_name"})
# Replace values
df["column_name"] = df["column_name"].replace({"old": "new"})
4. Handling Missing Data
# Check for missing values
df.isnull() # Boolean DataFrame
df.isnull().sum() # Count of missing values per column
# Drop missing values
df = df.dropna() # Drop rows with missing values
df = df.dropna(subset=["col1"]) # Drop rows with missing values in "col1"
# Fill missing values
df = df.fillna(0) # Replace NaNs with 0
df["col1"] = df["col1"].fillna(df["col1"].mean()) # Fill with mean
5. Sorting and Grouping
# Sort by a column
df = df.sort_values(by="col1", ascending=True)
# Group by and aggregate
df_grouped = df.groupby("category")["value"].mean() # Mean by group
df_grouped = df.groupby("category").agg({
"value1": "sum",
"value2": "mean"
}) # Custom aggregation
6. Combining DataFrames
# Concatenate DataFrames vertically or horizontally
df_combined = pd.concat([df1, df2], axis=0) # Vertical
df_combined = pd.concat([df1, df2], axis=1) # Horizontal
# Merge (SQL-style joins)
df_merged = pd.merge(df1, df2, on="key", how="inner") # Inner join
df_merged = pd.merge(df1, df2, on="key", how="left") # Left join
7. Applying Functions
# Apply a function to a column
df["col1"] = df["col1"].apply(lambda x: x * 2)
# Apply a function row-wise
df["new_col"] = df.apply(lambda row: row["col1"] + row["col2"], axis=1)
8. Saving Data
# Save to a CSV file
df.to_csv("output.csv", index=False)
# Save to an Excel file
df.to_excel("output.xlsx", index=False)
9. Handling Duplicates
# Drop duplicate rows
df = df.drop_duplicates()
# Identify duplicate rows
df.duplicated() # Boolean Series
df[df.duplicated()] # View duplicates
10. Resetting or Setting the Index
# Reset index to default
df = df.reset_index(drop=True)
# Set a specific column as the index
df = df.set_index("column_name")