pandas_python_cheat

#### Importing Pandas
import pandas as pd

#### Python Cheats
x = 10
result = "Greater" if x > 5 else "Smaller" # Ternary operator


#### Creating DataFrames
# From a dictionary
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data)

# From a CSV file
df = pd.read_csv('file.csv')

# From an Excel file
df = pd.read_excel('file.xlsx')

#### Viewing Data
df.head()  # First 5 rows
df.tail()  # Last 5 rows
df.info()  # Summary of DataFrame
df.describe()  # Summary statistics
df.shape  # Get number of rows and columns
df.columns  # List column names

#### Selecting Data
df['A']  # Select a single column
df[['A', 'B']]  # Select multiple columns
df.iloc[0]  # Select first row by index
df.loc[0, 'A']  # Select a specific value
df[df['A'] > 1]  # Filter rows based on condition
df["date"].dt.year == 2024 # Filter for all rows in 2024
new_products = new_products[new_products["category"].notna()] # Filter so column not null

#### Modifying Data
df['C'] = df['A'] + df['B']  # Add a new column
df.rename(columns={'A': 'Alpha'}, inplace=True)  # Rename column
df.drop(columns='B', axis=1, inplace=True)  # Drop column
df.drop(0, axis=0, inplace=True)  # Drop row
df.fillna(0, inplace=True)  # Fill missing values with 0
df["price"] = df["price"].str.replace("$", "", regex=False).astype(float)
df["price"] = pd.to_numeric(df["price"].str.replace("$", "", regex=False), errors="coerce") # Change type
df.replace({'old_value': 'new_value'}, inplace=True)  # Replace values
df["key"] = df["key"].str.replace("McCafé® ", "", regex=False) # Replace substring

#### Sorting & Ordering
df.sort_values('A', ascending=False)  # Sort by column
df.sort_index(ascending=True)  # Sort by index

#### Aggregation & Grouping
df.mean()  # Column-wise mean
df.groupby('A').sum()  # Group by column and sum
df['A'].value_counts()  # Count unique values

#### Handling Missing Data
df.isnull().sum()  # Count missing values
df.dropna(inplace=True)  # Drop missing values
df.fillna(0, inplace=True)  # Fill missing values with 0

#### Merging & Joining
df1.merge(df2, on='key')  # Inner join
df1.merge(df2, on='key', how='left')  # Left join
df1.append(df2, ignore_index=True)  # Append rows

#### Exporting Data
df.to_csv('file.csv', index=False)  # Save to CSV
df.to_excel('file.xlsx', index=False)  # Save to Excel

#### Pivot Tables
df.pivot_table(index='A', columns='B', values='C', aggfunc='sum')

#### Working with Dates
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

#### Applying Functions
df['A'] = df['A'].apply(lambda x: x*2)  # Apply function to column
df.applymap(lambda x: str(x).upper())  # Apply function to all elements

def generate_price(): ...
df["price"] = df.apply(generate_price, axis=1) # Apply generate_value func for every row without args
df['price2'] = df.apply(lambda row: generate_value(1, 2), axis=1) # Apply generate_value func for every row with args

---
🔹 **Tip:** Use `df.sample(5)` to quickly check random rows from your DataFrame!
Pandas Cheat Sheet

Comments

More from this blog

Connect to Fabric Lakehouse On-Prem

Command Palette

Comments

More from this blog