Pandas Cheat Sheet
Commonly used Pandas functions
Updated
•2 min read
#### Importing Pandas
import pandas as pd
#### Python Cheats
x = 10
result = "Greater" if x > 5 else "Smaller" # Ternary operator
#### Creating DataFrames
# From a dictionary
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)
# From a CSV file
df = pd.read_csv('file.csv')
# From an Excel file
df = pd.read_excel('file.xlsx')
#### Viewing Data
df.head() # First 5 rows
df.tail() # Last 5 rows
df.info() # Summary of DataFrame
df.describe() # Summary statistics
df.shape # Get number of rows and columns
df.columns # List column names
#### Selecting Data
df['A'] # Select a single column
df[['A', 'B']] # Select multiple columns
df.iloc[0] # Select first row by index
df.loc[0, 'A'] # Select a specific value
df[df['A'] > 1] # Filter rows based on condition
df["date"].dt.year == 2024 # Filter for all rows in 2024
new_products = new_products[new_products["category"].notna()] # Filter so column not null
#### Modifying Data
df['C'] = df['A'] + df['B'] # Add a new column
df.rename(columns={'A': 'Alpha'}, inplace=True) # Rename column
df.drop(columns='B', axis=1, inplace=True) # Drop column
df.drop(0, axis=0, inplace=True) # Drop row
df.fillna(0, inplace=True) # Fill missing values with 0
df["price"] = df["price"].str.replace("$", "", regex=False).astype(float)
df["price"] = pd.to_numeric(df["price"].str.replace("$", "", regex=False), errors="coerce") # Change type
df.replace({'old_value': 'new_value'}, inplace=True) # Replace values
df["key"] = df["key"].str.replace("McCafé® ", "", regex=False) # Replace substring
#### Sorting & Ordering
df.sort_values('A', ascending=False) # Sort by column
df.sort_index(ascending=True) # Sort by index
#### Aggregation & Grouping
df.mean() # Column-wise mean
df.groupby('A').sum() # Group by column and sum
df['A'].value_counts() # Count unique values
#### Handling Missing Data
df.isnull().sum() # Count missing values
df.dropna(inplace=True) # Drop missing values
df.fillna(0, inplace=True) # Fill missing values with 0
#### Merging & Joining
df1.merge(df2, on='key') # Inner join
df1.merge(df2, on='key', how='left') # Left join
df1.append(df2, ignore_index=True) # Append rows
#### Exporting Data
df.to_csv('file.csv', index=False) # Save to CSV
df.to_excel('file.xlsx', index=False) # Save to Excel
#### Pivot Tables
df.pivot_table(index='A', columns='B', values='C', aggfunc='sum')
#### Working with Dates
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
#### Applying Functions
df['A'] = df['A'].apply(lambda x: x*2) # Apply function to column
df.applymap(lambda x: str(x).upper()) # Apply function to all elements
def generate_price(): ...
df["price"] = df.apply(generate_price, axis=1) # Apply generate_value func for every row without args
df['price2'] = df.apply(lambda row: generate_value(1, 2), axis=1) # Apply generate_value func for every row with args
---
🔹 **Tip:** Use `df.sample(5)` to quickly check random rows from your DataFrame!


