!uv pip install python-dotenvUsing Python 3.10.18 environment at: C:\Users\hayk_\.conda\envs\lectures
Resolved 1 package in 576ms
Prepared 1 package in 124ms
Installed 1 package in 42ms
+ python-dotenv==1.1.1
Մարգարյան փողոց, լուսանկարի հղումը, Հեղինակ՝ Sanasar Tovmasyan
!uv pip install python-dotenvUsing Python 3.10.18 environment at: C:\Users\hayk_\.conda\envs\lectures
Resolved 1 package in 576ms
Prepared 1 package in 124ms
Installed 1 package in 42ms
+ python-dotenv==1.1.1
Պետք ա սարքենք .env անունով ֆայլ, որտեղ կպահենք մեր գաղտնի տվյալները, օրինակ՝
PASSWORD=hnkdahav
SECRET_KEY=1234567890abcdef
Նշում։ Վիդեոյի մեջ մոռանում եմ ասել՝ load_dotenv-ը default-ով ա գնում .env ֆայլը կարդում։ Եթե ուզենք ուրիշ ֆայլ կարդալ, պետք ա հասցեն որպես արգումենտ փոխանցենք։
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv(override=True)True
os.getenv("PASSWORD") # Access the environment variable'hndkahav'
os.getenv("SECRET_KEY") # Access the environment variable'1234567890abcdef'
pathlib is a modern, object-oriented approach to handling filesystem paths in Python. It provides a clean, intuitive interface that works across different operating systems.
from pathlib import Path
# Basic Path Creation
print("=== Basic Path Creation ===")
# Current working directory
current_dir = Path.cwd()
print(f"Current directory: {current_dir}")=== Basic Path Creation ===
Current directory: c:\Users\hayk_\OneDrive\Desktop\01_python_math_ml_course\python_libs
# Creating paths from strings
path1 = Path("documents/projects/myproject")
path2 = Path(r"\usr\local\bin")
print(f"Path 1: {path1}")
print(f"Path 2: {path2}")Path 1: documents\projects\myproject
Path 2: \usr\local\bin
path2WindowsPath('/usr/local/bin')
docs = Path("documents")
# os.path.join("docs", ..)
# Path from multiple parts (magic, polymorphism)
path3 = docs / "projects" / "myproject" / "file.txt"
print(f"Path 3: {path3}")Path 3: documents\projects\myproject\file.txt
# Converting between Path and string
path_str = str(path3)
print(f"Path as string: {path_str}")Path as string: documents\projects\myproject\file.txt
Path Properties
# Path Properties
print("=== Path Properties ===")
example_path = Path("documents/projects/myproject/data.csv")
print(f"Full path: {example_path}")
print(f"Name: {example_path.name = }") # data.csv
print(f"Stem: {example_path.stem = }") # data
print(f"Suffix: {example_path.suffix = }") # .csv
print(f"Suffixes: {example_path.suffixes = }") # ['.csv']
print(f"Parent: {example_path.parent = }") # documents/projects/myproject
print(f"Parents: {list(example_path.parents) = }") # All parent directories
print(f"Parts: {example_path.parts = }") # ('documents', 'projects', 'myproject', 'data.csv')=== Path Properties ===
Full path: documents\projects\myproject\data.csv
Name: example_path.name = 'data.csv'
Stem: example_path.stem = 'data'
Suffix: example_path.suffix = '.csv'
Suffixes: example_path.suffixes = ['.csv']
Parent: example_path.parent = WindowsPath('documents/projects/myproject')
Parents: list(example_path.parents) = [WindowsPath('documents/projects/myproject'), WindowsPath('documents/projects'), WindowsPath('documents'), WindowsPath('.')]
Parts: ('documents', 'projects', 'myproject', 'data.csv')
Path Joining and Resolution
# Path Joining and Resolution
print("=== Path Manipulation ===")
# Joining paths
base_path = Path("documents")
project_path = base_path / "projects" / "myproject"
print(f"Joined path: {project_path}")
# Resolving paths (absolute path)
resolved_path = project_path.resolve()
print(f"Resolved path: {resolved_path}")
# Check if path is absolute or relative
rel_path = Path("documents/file.txt")
print(f"Is absolute? {resolved_path.is_absolute()}") # True
print(f"Is absolute? {rel_path.is_absolute()}") # False=== Path Manipulation ===
Joined path: documents\projects\myproject
Resolved path: C:\Users\hayk_\OneDrive\Desktop\01_python_math_ml_course\python_libs\documents\projects\myproject
Is absolute? True
Is absolute? False
# Path Modification
print("=== Path Modification ===")
# Changing file extensions
data_file = Path("report.txt")
pdf_file = data_file.with_suffix(".pdf")
json_file = data_file.with_suffix(".json")
print(f"Original: {data_file}")
print(f"PDF version: {pdf_file}")
print(f"JSON version: {json_file}")=== Path Modification ===
Original: report.txt
PDF version: report.pdf
JSON version: report.json
# Changing filename but keeping directory
old_path = Path("documents/old_name.txt")
new_path = old_path.with_name("new_name.txt")
print(f"Old path: {old_path}")
print(f"New path: {new_path}")Old path: documents\old_name.txt
New path: documents\new_name.txt
# Path Checking Methods
print("=== Path Checking Methods ===")
# Note: These will work with actual files/directories
test_path = Path(".")
print(f"Testing path: {test_path.resolve()}")
print(f"Exists: {test_path.exists()}")
print(f"Is directory: {test_path.is_dir()}")
print(f"Is file: {test_path.is_file()}")=== Path Checking Methods ===
Testing path: C:\Users\hayk_\OneDrive\Desktop\01_python_math_ml_course\python_libs
Exists: True
Is directory: True
Is file: False
# File Reading and Writing
print("=== File Operations ===")
# Create a temporary file for demonstration
temp_file = Path("temp_demo.txt")
# Writing to a file
temp_file.write_text("Hello, World!\nThis is a test file.")
print(f"Created file: {temp_file}")
# # Reading from a file
content = temp_file.read_text()
print(f"File content:\n{content}")
# Clean up the temp file
temp_file.unlink(missing_ok=True)=== File Operations ===
Created file: temp_demo.txt
File content:
Hello, World!
This is a test file.
# File Statistics
print("=== File Statistics ===")
# Create a temporary file to get stats
temp_file = Path("temp_stats.txt")
temp_file.write_text("Sample content for statistics")
if temp_file.exists():
stat = temp_file.stat()
print(f"File size: {stat.st_size} bytes")
print(f"Modified time: {stat.st_mtime}")
# Clean up
temp_file.unlink(missing_ok=True)=== File Statistics ===
File size: 29 bytes
Modified time: 1755106330.0550132
# Directory Operations
print("=== Directory Operations ===")
# Create directories
test_dir = Path("test_directory")
test_dir.mkdir(exist_ok=True) # exist_ok prevents error if directory exists
print(f"Created directory: {test_dir}")
# Create nested directories
nested_dir = Path("parent/child/grandchild")
nested_dir.mkdir(parents=True, exist_ok=True)
print(f"Created nested directories: {nested_dir}")
# # List directory contents
=== Directory Operations ===
Created directory: test_directory
Created nested directories: parent\child\grandchild
current_contents = list(Path(".").iterdir())
print(f"Current directory contains {len(current_contents)} items")
current_contentsCurrent directory contains 30 items
[WindowsPath('00_misc_libraries.ipynb'),
WindowsPath('00_template.ipynb'),
WindowsPath('01_openai_api_timestamp_generator.ipynb'),
WindowsPath('02_numpy.ipynb'),
WindowsPath('03_pandas_1.ipynb'),
WindowsPath('04_pandas_2.ipynb'),
WindowsPath('05_noble_people_analysis.ipynb'),
WindowsPath('06_data_viz.ipynb'),
WindowsPath('07_kargin_project.ipynb'),
WindowsPath('08_logging__clis.ipynb'),
WindowsPath('09_testing__debugging.ipynb'),
WindowsPath('10_scraping__parallelization.ipynb'),
WindowsPath('11_ysu_scraping.ipynb'),
WindowsPath('12_sql.ipynb'),
WindowsPath('13_pydantic.ipynb'),
WindowsPath('assets'),
WindowsPath('bg_photos.ipynb'),
WindowsPath('clis'),
WindowsPath('dbs'),
WindowsPath('kargin_api.py'),
WindowsPath('manimm.py'),
WindowsPath('parent'),
WindowsPath('README_kargin_api.md'),
WindowsPath('requirements_kargin_api.txt'),
WindowsPath('scraping'),
WindowsPath('testing'),
WindowsPath('test_directory'),
WindowsPath('test_kargin_api.py'),
WindowsPath('unittest'),
WindowsPath('__pycache__')]
# Clean up
if test_dir.exists():
test_dir.rmdir() # Remove empty directory
print("Cleaned up test directory")Cleaned up test directory
# Basic Glob Patterns
print("=== Basic Glob Patterns ===")
# Current directory
current_dir = Path(".")
# Find all Python files
python_files = list(current_dir.glob("*.py"))
print(f"Python files in current directory: {len(python_files)}")
for py_file in python_files[:3]: # Show first 3
print(f" - {py_file}")
# Find all files recursively
all_files = list(current_dir.rglob("*")) # recursive glob
print(f"Total files recursively: {len(all_files)}")
=== Basic Glob Patterns ===
Python files in current directory: 3
- kargin_api.py
- manimm.py
- test_kargin_api.py
Total files recursively: 439
# Find specific patterns
notebook_files = list(current_dir.rglob("0*.ipynb"))
print(f"Jupyter notebooks: {len(notebook_files)}")
for nb in notebook_files: # Show all
print(f" - {nb}")Jupyter notebooks: 11
- 00_misc_libraries.ipynb
- 00_template.ipynb
- 01_openai_api_timestamp_generator.ipynb
- 02_numpy.ipynb
- 03_pandas_1.ipynb
- 04_pandas_2.ipynb
- 05_noble_people_analysis.ipynb
- 06_data_viz.ipynb
- 07_kargin_project.ipynb
- 08_logging__clis.ipynb
- 09_testing__debugging.ipynb
# Advanced Pathlib Features
print("=== Advanced Pathlib Features ===")
# Relative path calculation
base = Path("/home/user/projects")
target = Path("/home/user/projects/myapp/src/main.py")
try:
relative = target.relative_to(base)
print(f"Relative path: {relative}")
except ValueError:
print("Cannot calculate relative path - paths don't share common base")
=== Advanced Pathlib Features ===
Relative path: myapp\src\main.py
# Pathlib vs os.path Comparison
print("=== Pathlib vs os.path Comparison ===")
import os
filepath = "documents/projects/myproject/data.csv"
print("Task: Get filename from path")
print(f"os.path: {os.path.basename(filepath)}")
print(f"pathlib: {Path(filepath).name}")
print("\nTask: Get file extension")
print(f"os.path: {os.path.splitext(filepath)[1]}")
print(f"pathlib: {Path(filepath).suffix}")
print("\nTask: Get parent directory")
print(f"os.path: {os.path.dirname(filepath)}")
print(f"pathlib: {Path(filepath).parent}")
print("\nTask: Join paths")
print(f"os.path: {os.path.join('documents', 'projects', 'file.txt')}")
print(f"pathlib: {Path('documents') / 'projects' / 'file.txt'}")
print("\nTask: Check if file exists")
print(f"os.path: {os.path.exists(filepath)}")
print(f"pathlib: {Path(filepath).exists()}")# Practical Example: File Organization
print("=== Practical Example: File Organization ===")
def organize_files_by_extension(directory):
"""Organize files in a directory by their extensions"""
source_dir = Path(directory)
if not source_dir.exists():
print(f"Directory {directory} does not exist")
return
# Get all files (not directories)
files = [f for f in source_dir.iterdir() if f.is_file()]
# Group by extension
extensions = {}
for file in files:
ext = file.suffix.lower() or 'no_extension'
if ext not in extensions:
extensions[ext] = []
extensions[ext].append(file)
print(f"Files in {directory}:")
for ext, file_list in extensions.items():
print(f" {ext}: {len(file_list)} files")
for file in file_list[:3]: # Show first 3
print(f" - {file.name}")
# Example usage
organize_files_by_extension(".")# Practical Example: Find Large Files
print("=== Practical Example: Find Large Files ===")
def find_large_files(directory, size_mb=10):
"""Find files larger than specified size"""
dir_path = Path(directory)
large_files = []
for file in dir_path.rglob("*"):
if file.is_file():
try:
size_mb_actual = file.stat().st_size / (1024 * 1024)
if size_mb_actual > size_mb:
large_files.append((file, size_mb_actual))
except (OSError, PermissionError):
continue
return large_files
# Find files larger than 1MB in current directory
large_files = find_large_files(".", 1)
print(f"Files larger than 1MB: {len(large_files)}")
for file, size in large_files[:3]:
print(f" - {file.name}: {size:.2f} MB")=== Practical Example: Find Large Files ===
Files larger than 1MB: 10
- 03_pandas_1.ipynb: 20.88 MB
- 04_pandas_2.ipynb: 3.86 MB
- 06_data_viz.ipynb: 4.41 MB
git , 50 mb limit
https://github.com/gruns/icecream
icecream is a powerful debugging library that makes print debugging sweet and simple. It’s a more convenient and informative alternative to Python’s print() function.
pip install icecream!pip install icecreamCollecting icecream
Downloading icecream-2.1.5-py3-none-any.whl.metadata (1.5 kB)
Requirement already satisfied: colorama>=0.3.9 in c:\users\hayk_\.conda\envs\sl\lib\site-packages (from icecream) (0.4.6)
Requirement already satisfied: pygments>=2.2.0 in c:\users\hayk_\.conda\envs\sl\lib\site-packages (from icecream) (2.19.1)
Requirement already satisfied: executing>=2.1.0 in c:\users\hayk_\.conda\envs\sl\lib\site-packages (from icecream) (2.2.0)
Requirement already satisfied: asttokens>=2.0.1 in c:\users\hayk_\.conda\envs\sl\lib\site-packages (from icecream) (3.0.0)
Downloading icecream-2.1.5-py3-none-any.whl (14 kB)
Installing collected packages: icecream
Successfully installed icecream-2.1.5
https://www.youtube.com/watch?v=JMW6u_AFVKY
https://www.youtube.com/watch?v=-33IXM8gC4gfrom icecream import ic# Basic Variable Debugging
print("=== Basic iceCream Examples ===")
# Simple variable debugging
name = "Alice"
age = 30
ic(name) # name: 'Alice'
ic(age) # Shows: ic| age: 30
print(f"name = {name}")
print(f"{name = }")
# Multiple variables at once
ic(name, age) # Shows both variables with namesic| name: 'Alice'
ic| age: 30
ic| name: 'Alice', age: 30
=== Basic iceCream Examples ===
name = Alice
name = 'Alice'
('Alice', 30)
# Expressions
numbers = [1, 2, 3, 4, 5]
ic(len(numbers)) # ic| len(numbers): 5
ic(sum(numbers)) # ic| sum(numbers): 15
ic(numbers[0]) # ic| numbers[0]: 1
# Complex expressions
data = {"users": ["Alice", "Bob", "Charlie"]}
ic(data["users"][0]) # ic| data["users"][0]: 'Alice'
ic(len(data["users"])) # ic| len(data["users"]): 3ic| len(numbers): 5
ic| sum(numbers): 15
ic| numbers[0]: 1
ic| data["users"][0]: 'Alice'
ic| len(data["users"]): 3
3
# Data Structure Debugging
print("=== Data Structure Debugging ===")
# Lists and dictionaries
users = [
{"name": "Alice", "age": 30, "city": "New York"},
{"name": "Bob", "age": 25, "city": "London"},
{"name": "Charlie", "age": 35, "city": "Tokyo"}
]
ic(users)
ic(users[0])
ic([user["name"] for user in users])
# Complex data structures
nested_data = {
"project": "ML Pipeline",
"stages": {
"data_preprocessing": {"status": "complete", "time": 45},
"training": {"status": "in_progress", "time": 120},
"evaluation": {"status": "pending", "time": None}
}
}ic| users: [{'age': 30, 'city': 'New York', 'name': 'Alice'},
{'age': 25, 'city': 'London', 'name': 'Bob'},
{'age': 35, 'city': 'Tokyo', 'name': 'Charlie'}]
ic| users[0]: {'age':
=== Data Structure Debugging ===
30, 'city': 'New York', 'name': 'Alice'}
ic| [user["name"] for user in users]: ['Alice', 'Bob', 'Charlie']
ic(nested_data)
# ic(nested_data["stages"]["training"]["status"])
print(f"{nested_data = }")ic| nested_data: {'project': 'ML Pipeline',
'stages': {'data_preprocessing': {'status': 'complete', 'time': 45},
'evaluation': {'status': 'pending', 'time': None},
'training': {'status': 'in_progress', 'time': 120}}}
nested_data = {'project': 'ML Pipeline', 'stages': {'data_preprocessing': {'status': 'complete', 'time': 45}, 'training': {'status': 'in_progress', 'time': 120}, 'evaluation': {'status': 'pending', 'time': None}}}
ic.includeContext = Truea = 3
ic(a)ic| 3036100448.py:2 in <module>- a: 3
3
ic.configureOutput(prefix='DEBUG | ')
ic(a)DEBUG | 2746202514.py:3 in <module>- a: 3
3
Numba is a just-in-time (JIT) compiler for Python that translates Python functions to optimized machine code at runtime. It can dramatically speed up numerical computations with minimal code changes.
pip install numba
# For CUDA support:
# conda install numba cudatoolkit!pip install numbaCollecting numba
Downloading numba-0.61.2-cp310-cp310-win_amd64.whl.metadata (2.9 kB)
Collecting llvmlite<0.45,>=0.44.0dev0 (from numba)
Using cached llvmlite-0.44.0-cp310-cp310-win_amd64.whl.metadata (5.0 kB)
Requirement already satisfied: numpy<2.3,>=1.24 in c:\users\hayk_\.conda\envs\sl\lib\site-packages (from numba) (2.2.6)
Downloading numba-0.61.2-cp310-cp310-win_amd64.whl (2.8 MB)
---------------------------------------- 0.0/2.8 MB ? eta -:--:--
----------- ---------------------------- 0.8/2.8 MB 5.6 MB/s eta 0:00:01
------------------ --------------------- 1.3/2.8 MB 3.5 MB/s eta 0:00:01
----------------------------- ---------- 2.1/2.8 MB 3.8 MB/s eta 0:00:01
---------------------------------------- 2.8/2.8 MB 3.7 MB/s eta 0:00:00
Using cached llvmlite-0.44.0-cp310-cp310-win_amd64.whl (30.3 MB)
Installing collected packages: llvmlite, numba
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
---------------------------------------- 0/2 [llvmlite]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
-------------------- ------------------- 1/2 [numba]
---------------------------------------- 2/2 [numba]
Successfully installed llvmlite-0.44.0 numba-0.61.2
# Basic Numba JIT Compilation
import time
import numpy as np
from numba import jit, njit
# Example 1: Simple mathematical function
def python_sum_of_squares(n):
"""Regular Python function"""
total = 0
for i in range(n):
total += i * i
return total
@jit
def numba_sum_of_squares(n):
"""Numba-compiled function"""
total = 0
for i in range(n):
total += i * i
return total
# Compare performance
n_python = 1_000_000
print(f"Computing sum of squares for n={n_python:,}")
n_numba = 1_000_000_000_000
print(f"Computing sum of squares for n={n_numba:,}")
# Time Python version
start = time.time()
python_result = python_sum_of_squares(n_python)
python_time = time.time() - start
# Time Numba version (includes compilation time on first run)
start = time.time()
numba_result = numba_sum_of_squares(n_numba)
numba_time = time.time() - start
print(f"Python result: {python_result:,}")
print(f"Numba result: {numba_result:,}")
print(f"Python time: {python_time:.6f} seconds")
print(f"Numba time: {numba_time} seconds")
speedup = python_time / numba_time
print(f"Speedup: {speedup:.2f}x faster with Numba!")
Computing sum of squares for n=1,000,000
Computing sum of squares for n=1,000,000,000,000
Python result: 333,332,833,333,500,000
Numba result: -8,993,179,702,706,251,776
Python time: 0.211253 seconds
Numba time: 0.001999378204345703 seconds
Speedup: 105.66x faster with Numba!
Just-In-Time (JIT) Compilation is a technique where code is compiled at runtime rather than ahead of time. Here’s how Numba works:
@jit decorated function:
Numba creates specialized versions of functions for different input types:
@jit
def add_numbers(a, b):
return a + b
# First call with integers - compiles version for int64
result1 = add_numbers(5, 3)
# First call with floats - compiles version for float64
result2 = add_numbers(5.0, 3.0)
# Second call with integers - uses cached int64 version
result3 = add_numbers(10, 7)# Different Numba Compilation Modes
print("=== Numba Compilation Modes ===")
from numba import jit, njit, prange
# Mode 1: @jit - Lazy compilation
@jit
def lazy_compiled_function(x):
"""Compiled when first called"""
return x ** 2 + 2 * x + 1
# Mode 2: @njit - No-Python mode (faster)
@njit
def nopython_function(x):
"""Compiled in no-Python mode for maximum speed"""
return x ** 2 + 2 * x + 1
# Mode 3: @jit(nopython=True) - Explicit no-Python mode
@jit(nopython=True)
def explicit_nopython(x):
"""Explicitly compiled in no-Python mode"""
return x ** 2 + 2 * x + 1
# Mode 4: Parallel processing
@njit(parallel=True)
def parallel_function(arr):
"""Parallel processing with prange"""
result = np.zeros_like(arr)
for i in prange(len(arr)): # prange enables parallel execution
result[i] = arr[i] ** 2 + np.sin(arr[i])
return result
# Test different modes
test_array = np.random.random(1_000_000)
test_value = 42.0
print("Testing compilation modes:")
print(f"Input array size: {len(test_array):,}")
# Test scalar functions
modes = [
("Lazy compiled (@jit)", lazy_compiled_function),
("No-Python (@njit)", nopython_function),
("Explicit no-Python", explicit_nopython)
]
for name, func in modes:
start = time.time()
result = func(test_value)
elapsed = time.time() - start
print(f"{name}: {result:.2f} (time: {elapsed:.6f}s)")
# Test parallel function
print(f"\nTesting parallel processing:")
start = time.time()
parallel_result = parallel_function(test_array[:100_000]) # Use smaller array for demo
parallel_time = time.time() - start
print(f"Parallel function completed in {parallel_time:.6f} seconds")
print(f"Result shape: {parallel_result.shape}")
print(f"Sample results: {parallel_result[:5]}")=== Numba Compilation Modes ===
Testing compilation modes:
Input array size: 1,000,000
Lazy compiled (@jit): 1849.00 (time: 0.119028s)
No-Python (@njit): 1849.00 (time: 0.087462s)
Explicit no-Python: 1849.00 (time: 0.101513s)
Testing parallel processing:
Parallel function completed in 0.576533 seconds
Result shape: (100000,)
Sample results: [0.51835051 0.46277311 0.07698531 1.4442374 1.19414022]
Numba offers different compilation modes, each with specific characteristics and use cases:
@jit - Object Mode (Default)@jit
def my_function(x):
return x ** 2Characteristics: - Fallback capability: If Numba can’t compile part of the code, it falls back to Python - Mixed execution: Some parts run compiled, others in Python interpreter - More forgiving: Works with more Python features - Moderate speedup: Usually 2-10x faster than pure Python
When to use: When you’re not sure if your code is fully Numba-compatible
@njit - No-Python Mode@njit # Equivalent to @jit(nopython=True)
def my_function(x):
return x ** 2Characteristics: - Pure compilation: Entire function must be compilable to machine code - No Python interpreter: No fallback to Python - Maximum performance: 10-1000x faster than pure Python - Strict requirements: Only supports Numba-compatible operations
When to use: For numerical computations where you want maximum performance
@jit(nopython=True) - Explicit No-Python Mode@jit(nopython=True)
def my_function(x):
return x ** 2Characteristics: - Identical to @njit: Same behavior as @njit - More explicit: Makes the no-Python requirement clear - Error handling: Fails with clear error if Python fallback would be needed
When to use: When you want to be explicit about no-Python mode requirements
@njit(parallel=True)
def parallel_function(arr):
for i in prange(len(arr)): # prange enables parallelization
arr[i] = arr[i] ** 2
return arrCharacteristics: - Multi-core execution: Uses multiple CPU cores automatically - Requires prange: Use prange instead of range for parallel loops - Additional speedup: 2-8x additional speedup on multi-core systems - Independent iterations: Loop iterations must be independent
| Mode | Speed | Compatibility | Error Handling | Use Case |
|---|---|---|---|---|
@jit |
Moderate (2-10x) | High | Fallback to Python | Testing/Development |
@njit |
Maximum (10-1000x) | Limited | Compilation error | Production numerical code |
@jit(nopython=True) |
Maximum (10-1000x) | Limited | Compilation error | Explicit no-Python |
@njit(parallel=True) |
Maximum + Multi-core | Limited | Compilation error | Parallelizable algorithms |
@jit for development and testing@njit once your code works and you want maximum performance@njit(parallel=True) for loops with independent iterations@jit(nopython=True) when you want to be clear about requirementsNumba can automatically parallelize certain types of loops across multiple CPU cores, providing additional speedup on multi-core systems.
When you use @njit(parallel=True), Numba:
prange vs range# Sequential execution
@njit
def sequential_sum(arr):
total = 0
for i in range(len(arr)): # Sequential loop
total += arr[i] ** 2
return total
# Parallel execution
@njit(parallel=True)
def parallel_sum(arr):
total = 0
for i in prange(len(arr)): # Parallel loop
total += arr[i] ** 2
return totalKey Differences: - range: Executes iterations sequentially (one after another) - prange: Executes iterations in parallel (simultaneously across cores) - prange requires parallel=True in the decorator
For a loop to be parallelizable with prange:
# ❌ This won't parallelize correctly (each iteration depends on previous)
@njit(parallel=True)
def bad_parallel_example(arr):
for i in prange(1, len(arr)):
arr[i] = arr[i] + arr[i-1] # Depends on previous iteration
return arr
# ✅ This parallelizes well (independent iterations)
@njit(parallel=True)
def good_parallel_example(arr):
for i in prange(len(arr)):
arr[i] = arr[i] ** 2 + 5 # Each iteration is independent
return arrNumba automatically handles common reduction patterns:
@njit(parallel=True)
def parallel_reductions(arr):
# These are automatically parallelized
total_sum = 0
maximum = arr[0]
minimum = arr[0]
for i in prange(len(arr)):
total_sum += arr[i] # Automatic parallel sum
if arr[i] > maximum:
maximum = arr[i] # Automatic parallel max
if arr[i] < minimum:
minimum = arr[i] # Automatic parallel min
return total_sum, maximum, minimumNUMBA_AVAILABLE = True
# Practical Example: Monte Carlo Pi Estimation
print("=== Monte Carlo Pi Estimation ===")
# Regular Python implementation
def python_monte_carlo_pi(n_samples):
"""Estimate π using Monte Carlo method - Python version"""
inside_circle = 0
for i in range(n_samples):
x = np.random.random()
y = np.random.random()
if x*x + y*y <= 1.0:
inside_circle += 1
return 4.0 * inside_circle / n_samples
# Numba-optimized implementation
@njit
def numba_monte_carlo_pi(n_samples):
"""Estimate π using Monte Carlo method - Numba version"""
inside_circle = 0
for i in range(n_samples):
x = np.random.random()
y = np.random.random()
if x*x + y*y <= 1.0:
inside_circle += 1
return 4.0 * inside_circle / n_samples
# Parallel version
if NUMBA_AVAILABLE:
@njit(parallel=True)
def parallel_monte_carlo_pi(n_samples):
"""Parallel Monte Carlo π estimation"""
inside_circle = 0
for i in prange(n_samples):
x = np.random.random()
y = np.random.random()
if x*x + y*y <= 1.0:
inside_circle += 1
return 4.0 * inside_circle / n_samples
else:
def parallel_monte_carlo_pi(n_samples):
return numba_monte_carlo_pi(n_samples)
# Compare implementations
n_samples = 1_000_000
print(f"Estimating π with {n_samples:,} samples")
print(f"True value of π: {np.pi:.6f}")
# Python version
start = time.time()
python_pi = python_monte_carlo_pi(n_samples)
python_mc_time = time.time() - start
# Numba version
start = time.time()
numba_pi = numba_monte_carlo_pi(n_samples)
numba_mc_time = time.time() - start
# Parallel version
start = time.time()
parallel_pi = parallel_monte_carlo_pi(n_samples)
parallel_mc_time = time.time() - start
print(f"\nResults:")
print(f"Python estimate: {python_pi:.6f} (error: {abs(python_pi - np.pi):.6f}) - {python_mc_time:.4f}s")
print(f"Numba estimate: {numba_pi:.6f} (error: {abs(numba_pi - np.pi):.6f}) - {numba_mc_time:.4f}s")
print(f"Parallel estimate: {parallel_pi:.6f} (error: {abs(parallel_pi - np.pi):.6f}) - {parallel_mc_time:.4f}s")
if NUMBA_AVAILABLE and python_mc_time > 0:
numba_speedup = python_mc_time / numba_mc_time
parallel_speedup = python_mc_time / parallel_mc_time
print(f"\nSpeedups:")
print(f"Numba speedup: {numba_speedup:.2f}x")
print(f"Parallel speedup: {parallel_speedup:.2f}x")=== Monte Carlo Pi Estimation ===
Estimating π with 1,000,000 samples
True value of π: 3.141593
Results:
Python estimate: 3.140804 (error: 0.000789) - 1.6733s
Numba estimate: 3.141368 (error: 0.000225) - 0.2625s
Parallel estimate: 3.143076 (error: 0.001483) - 0.7440s
Speedups:
Numba speedup: 6.37x
Parallel speedup: 2.25x
Understanding compilation overhead is crucial for effectively using Numba in your applications.
What is it? - Time spent analyzing and compiling your function on the first call - Can range from milliseconds to several seconds depending on function complexity - Only happens once per function per set of input types
Example Timeline:
First call: [Compilation: 100ms] + [Execution: 1ms] = 101ms total
Second call: [Execution: 1ms] = 1ms total
Third call: [Execution: 1ms] = 1ms total
...
When is Numba worth it?
Consider a function that: - Takes 100ms compilation time - Pure Python version takes 10ms per call - Numba version takes 0.1ms per call
Break-even calculation:
Compilation time / (Python time - Numba time) = Number of calls to break even
100ms / (10ms - 0.1ms) = 100ms / 9.9ms ≈ 10 calls
After 10 calls, you start saving time!
# Perfect for Numba - called thousands of times
@njit
def distance_calculation(points1, points2):
distances = np.zeros(len(points1))
for i in range(len(points1)):
dx = points1[i][0] - points2[i][0]
dy = points1[i][1] - points2[i][1]
distances[i] = np.sqrt(dx*dx + dy*dy)
return distancesPre-compile functions to avoid first-call overhead:
from numba import njit
import numba as nb
# Specify exact types to pre-compile
@njit('float64(float64[:])') # Pre-compiled for float64 arrays
def optimized_function(arr):
return np.sum(arr ** 2)
# Alternative: Compile immediately
@njit
def another_function(x):
return x ** 2
# Force compilation
another_function(1.0) # Compile nowMake a dummy call during initialization:
@njit
def my_algorithm(data):
# Complex algorithm here
return result
# Warm-up during app startup
dummy_data = np.array([1.0, 2.0, 3.0])
my_algorithm(dummy_data) # Compile now, not during critical pathAlways measure before optimizing:
import time
def profile_function(func, *args, runs=100):
# Warm-up
func(*args)
# Time multiple runs
start = time.time()
for _ in range(runs):
result = func(*args)
end = time.time()
return (end - start) / runs, result
python_time, _ = profile_function(python_version, data)
numba_time, _ = profile_function(numba_version, data)
print(f"Speedup: {python_time / numba_time:.2f}x")Use Numba when: 1. ✅ Function is called multiple times (>10-100 times) 2. ✅ Function contains numerical computations 3. ✅ Function has loops or array operations 4. ✅ Current performance is a bottleneck
Don’t use Numba when: 1. ❌ Function is called only once 2. ❌ Function is already fast enough 3. ❌ Function uses unsupported Python features 4. ❌ Development time outweighs performance gains
# Numerical Algorithm: Solving Differential Equations
print("=== Solving Differential Equations with Numba ===")
# Example: Simple harmonic oscillator dy/dt = -k*y
# Analytical solution: y(t) = y0 * cos(sqrt(k)*t)
def python_euler_method(y0, k, dt, n_steps):
"""Euler method for solving dy/dt = -k*y - Python version"""
y = np.zeros(n_steps)
y[0] = y0
for i in range(1, n_steps):
y[i] = y[i-1] + dt * (-k * y[i-1])
return y
@njit
def numba_euler_method(y0, k, dt, n_steps):
"""Euler method for solving dy/dt = -k*y - Numba version"""
y = np.zeros(n_steps)
y[0] = y0
for i in range(1, n_steps):
y[i] = y[i-1] + dt * (-k * y[i-1])
return y
@njit
def numba_runge_kutta_4(y0, k, dt, n_steps):
"""4th-order Runge-Kutta method - Numba optimized"""
y = np.zeros(n_steps)
y[0] = y0
for i in range(1, n_steps):
y_curr = y[i-1]
k1 = dt * (-k * y_curr)
k2 = dt * (-k * (y_curr + k1/2))
k3 = dt * (-k * (y_curr + k2/2))
k4 = dt * (-k * (y_curr + k3))
y[i] = y_curr + (k1 + 2*k2 + 2*k3 + k4) / 6
return y
# Setup problem parameters
y0 = 1.0 # Initial condition
k = 1.0 # Spring constant
dt = 0.01 # Time step
n_steps = 10000
t_final = (n_steps - 1) * dt
print(f"Solving dy/dt = -k*y with y(0) = {y0}, k = {k}")
print(f"Time steps: {n_steps}, dt = {dt}, final time = {t_final}")
# Compare methods and timing
start = time.time()
python_solution = python_euler_method(y0, k, dt, n_steps)
python_ode_time = time.time() - start
start = time.time()
numba_euler = numba_euler_method(y0, k, dt, n_steps)
numba_euler_time = time.time() - start
start = time.time()
numba_rk4 = numba_runge_kutta_4(y0, k, dt, n_steps)
numba_rk4_time = time.time() - start
# Analytical solution for comparison
t = np.linspace(0, t_final, n_steps)
analytical = y0 * np.cos(np.sqrt(k) * t)
print(f"\nTiming results:")
print(f"Python Euler: {python_ode_time:.6f} seconds")
print(f"Numba Euler: {numba_euler_time:.6f} seconds")
print(f"Numba RK4: {numba_rk4_time:.6f} seconds")
if NUMBA_AVAILABLE and python_ode_time > 0:
euler_speedup = python_ode_time / numba_euler_time
print(f"Euler speedup: {euler_speedup:.2f}x")
# Check accuracy at final time
final_analytical = analytical[-1]
final_python = python_solution[-1]
final_numba_euler = numba_euler[-1]
final_numba_rk4 = numba_rk4[-1]
print(f"\nAccuracy at t = {t_final}:")
print(f"Analytical: {final_analytical:.6f}")
print(f"Python Euler: {final_python:.6f} (error: {abs(final_python - final_analytical):.6f})")
print(f"Numba Euler: {final_numba_euler:.6f} (error: {abs(final_numba_euler - final_analytical):.6f})")
print(f"Numba RK4: {final_numba_rk4:.6f} (error: {abs(final_numba_rk4 - final_analytical):.6f})")
print(f"\nRK4 is {abs(final_numba_euler - final_analytical) / abs(final_numba_rk4 - final_analytical):.1f}x more accurate!")Understanding what Numba can and cannot compile is essential for writing efficient Numba code.
+, -, *, /, **, //, %)==, !=, <, >, <=, >=)and, or, not)if, for, while, break, continue)len, range, enumerate, zip)np.sin, np.cos, np.sqrt, np.exp, etc.)np.zeros, np.ones, np.empty, np.arange)np.sum, np.mean, np.max, np.min)@njit
def supported_operations(arr1, arr2):
# All of these work perfectly with Numba
result = np.zeros_like(arr1)
for i in range(len(arr1)):
if arr1[i] > 0:
result[i] = np.sqrt(arr1[i]) + np.sin(arr2[i])
else:
result[i] = arr1[i] ** 2
return np.sum(result), np.max(result)int32, int64, uint32, uint64float32, float64complex64, complex128bool@njit
def list_example():
# ✅ This works - homogeneous list
numbers = [1.0, 2.0, 3.0, 4.0]
# ❌ This doesn't work - mixed types
# mixed = [1, 2.0, "hello"] # Would cause compilation error
return sum(numbers)# ❌ This won't work with @njit
class MyClass:
def __init__(self, value):
self.value = value
def compute(self):
return self.value ** 2
@njit # This would fail
def use_class():
obj = MyClass(5)
return obj.compute()# ❌ These don't work with @njit
@njit
def string_operations(text):
# None of these work in Numba
# result = text.upper()
# split_text = text.split(",")
# formatted = f"Result: {text}"
passwith statements)Separate Numba-compatible code from incompatible code:
# Pure computational kernel - perfect for Numba
@njit
def compute_kernel(data):
result = np.zeros_like(data)
for i in range(len(data)):
result[i] = data[i] ** 2 + np.sin(data[i])
return result
# Main function - handles I/O and object creation
def main_function(filename):
# File I/O in regular Python
data = np.loadtxt(filename)
# Computation in Numba
result = compute_kernel(data)
# Save results in regular Python
np.savetxt("output.txt", result)
return resultConvert complex structures to Numba-compatible formats:
# Convert dictionary to arrays
def dict_to_arrays(data_dict):
keys = np.array(list(data_dict.keys()))
values = np.array(list(data_dict.values()))
return keys, values
@njit
def process_arrays(keys, values):
# Process with Numba
return np.sum(values * keys)
# Usage
data = {1: 10, 2: 20, 3: 30}
k, v = dict_to_arrays(data)
result = process_arrays(k, v)Use explicit typing to catch incompatibilities early:
from numba import njit, float64, int64
@njit(float64(float64[:], int64))
def typed_function(arr, n):
# Explicit types make requirements clear
total = 0.0
for i in range(n):
total += arr[i]
return total# Numba Best Practices and Common Pitfalls
print("=== Numba Best Practices ===")
# Example of what works well with Numba
@njit
def numba_friendly_function(arr):
"""Functions that work well with Numba"""
# ✅ Numerical computations
# ✅ Loops with numerical operations
# ✅ NumPy arrays and basic math functions
result = 0.0
for i in range(len(arr)):
result += arr[i] ** 2 + np.sin(arr[i])
return result
# Example of what doesn't work well
def numba_unfriendly_function(data):
"""Functions that don't work well with Numba (can't be compiled)"""
# ❌ Python lists with mixed types
# ❌ String operations
# ❌ Complex Python objects
# ❌ File I/O operations
# This would fail with @njit
result = []
for item in data:
if isinstance(item, str):
result.append(len(item))
else:
result.append(item * 2)
return result
# Demonstrate compilation overhead
@njit
def simple_function(x):
return x ** 2
print("Compilation overhead demonstration:")
# First call includes compilation time
start = time.time()
result1 = simple_function(5.0)
first_call_time = time.time() - start
# Second call is much faster (already compiled)
start = time.time()
result2 = simple_function(10.0)
second_call_time = time.time() - start
print(f"First call (with compilation): {first_call_time:.6f} seconds")
print(f"Second call (already compiled): {second_call_time:.6f} seconds")
print(f"Compilation overhead: {(first_call_time - second_call_time) * 1000:.2f} milliseconds")
# Type stability example
@njit
def type_stable_function(arr):
"""Type-stable function (good for Numba)"""
total = 0.0 # Always float
for val in arr:
total += val
return total
print(f"\nTesting type-stable function:")
test_arr = np.array([1.5, 2.3, 3.7, 4.1])
result = type_stable_function(test_arr)
print(f"Result: {result}")
print("\n" + "="*50)
print("NUMBA BEST PRACTICES SUMMARY")
print("="*50)
best_practices = """
✅ DO:
• Use for numerical computations and tight loops
• Work with NumPy arrays and basic math functions
• Keep functions type-stable (consistent types)
• Use @njit for maximum performance
• Consider @njit(parallel=True) for parallelizable loops
• Profile your code to identify bottlenecks first
❌ DON'T:
• Use for string manipulation or file I/O
• Mix different data types in the same array
• Use Python lists with mixed types
• Expect speedup for single function calls (compilation overhead)
• Use for functions that are already fast enough
🔧 OPTIMIZATION TIPS:
• Use explicit types: @njit('float64(float64[:])')
• Avoid object mode compilation
• Use parallel=True for independent iterations
• Consider using numba.typed containers for complex data
• Pre-compile with eager compilation for production
⚡ WHEN TO USE NUMBA:
• Numerical algorithms with loops
• Scientific computing applications
• Monte Carlo simulations
• Image/signal processing
• Machine learning computations
• Any CPU-bound numerical code
"""
print(best_practices)
# Performance summary
print("\n" + "="*50)
print("PERFORMANCE SUMMARY")
print("="*50)
print("🚀 Typical speedups with Numba:")
print("• Simple loops: 10-100x faster")
print("• Numerical algorithms: 50-1000x faster")
print("• Monte Carlo methods: 100-500x faster")
print("• Matrix operations: 2-50x faster (depends on size)")
print("• Parallel algorithms: Additional 2-8x on multi-core systems")Understanding how Numba handles types is crucial for writing efficient and reliable compiled code.
By default, Numba automatically infers types from function arguments:
@njit
def auto_typed_function(x, y):
return x + y
# Numba infers types from actual arguments:
result1 = auto_typed_function(5, 3) # int64, int64 → int64
result2 = auto_typed_function(5.0, 3.0) # float64, float64 → float64
result3 = auto_typed_function(5, 3.0) # int64, float64 → float64Advantages: - ✅ Easy to use - no type annotations needed - ✅ Flexible - works with different input types - ✅ Automatic promotion - handles mixed types sensibly
Disadvantages: - ⚠️ Multiple compilations - different types create different compiled versions - ⚠️ Compilation overhead - each new type combination triggers compilation - ⚠️ Larger memory usage - multiple compiled versions stored
You can specify exact types to control compilation:
from numba import njit, float64, int64, void
# Single signature - only accepts these exact types
@njit('float64(float64, float64)')
def explicit_typed_function(x, y):
return x + y
# Multiple signatures - pre-compile for specific combinations
@njit(['float64(float64, float64)',
'int64(int64, int64)',
'float64(int64, float64)'])
def multi_signature_function(x, y):
return x + y
# Array signatures
@njit('float64[:](float64[:])') # 1D array input and output
def array_function(arr):
return arr * 2
# Void function (no return value)
@njit('void(float64[:], float64)')
def inplace_function(arr, value):
for i in range(len(arr)):
arr[i] += valueint8, int16, int32, int64 - Signed integersuint8, uint16, uint32, uint64 - Unsigned integersfloat32, float64 - Floating point numberscomplex64, complex128 - Complex numbersboolean - Boolean valuesfloat64[:] - 1D arrayfloat64[:,:] - 2D arrayfloat64[:,:,:] - 3D arrayfloat64[::1] - Contiguous 1D array (faster)(float64, int64) - Tuple with specific typesListType(float64) - Homogeneous listKeep variable types consistent throughout the function:
# ❌ Type unstable - poor performance
@njit
def type_unstable():
x = 5 # int64
x = 5.0 # Now float64 - type changed!
return x
# ✅ Type stable - good performance
@njit
def type_stable():
x = 5.0 # float64
x = x + 1.0 # Still float64
return xSpecify contiguous arrays for better performance:
# Standard array - may not be contiguous
@njit('float64[:](float64[:])')
def standard_array_func(arr):
return arr * 2
# Contiguous array - guaranteed contiguous, faster
@njit('float64[::1](float64[::1])')
def contiguous_array_func(arr):
return arr * 2# Begin development with automatic inference
@njit
def develop_function(data):
# Develop and test your algorithm
return process_data(data)# Add explicit types once algorithm is stable
@njit('float64[:](float64[:], float64)')
def production_function(data, parameter):
# Same algorithm, but with explicit types
return process_data(data, parameter)from numba import types
@njit
def debug_types(x, y):
# This helps debug type issues
print("x type:", type(x))
print("y type:", type(y))
return x + ytry:
@njit('int64(int64, int64)')
def strict_function(x, y):
return x + y
# This will work
result = strict_function(5, 3)
# This will raise TypeError
# result = strict_function(5.0, 3.0)
except Exception as e:
print(f"Type error: {e}")
# Fallback to auto-inference
@njit
def flexible_function(x, y):
return x + y
result = flexible_function(5.0, 3.0)# Function signatures follow pattern: 'return_type(arg1_type, arg2_type, ...)'
@njit('float64(float64)') # f(x: float) → float
@njit('float64[:](float64[:], float64)') # f(arr: float[], scalar: float) → float[]
@njit('(float64, int64)(float64[:])') # f(arr: float[]) → (float, int)
@njit('void(float64[:], float64)') # f(arr: float[], val: float) → None
@njit('float64[:,::1](float64[:,::1])') # f(matrix: float[][]) → float[][]Understanding these concepts will help you write more efficient and reliable Numba code!
The collections module provides specialized data structures that extend Python’s built-in data types with additional functionality and improved performance for specific use cases.
# Counter - Counting Hashable Objects
from collections import Counter, defaultdict, namedtuple
print("=== Counter Examples ===")
# Basic counting
text = "hello world"
char_count = Counter(text)
print(f"Character count: {char_count}")=== Counter Examples ===
Character count: Counter({'l': 3, 'o': 2, 'h': 1, 'e': 1, ' ': 1, 'w': 1, 'r': 1, 'd': 1})
# Count words in text
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_count = Counter(words)
print(f"Word count: {word_count}")
# Most common elements
print(f"Most common: {word_count.most_common(2)}")
# Counter arithmetic
counter1 = Counter(['a', 'b', 'c', 'a'])
counter2 = Counter(['a', 'b', 'b', 'd'])Word count: Counter({'apple': 3, 'banana': 2, 'cherry': 1})
Most common: [('apple', 3), ('banana', 2)]
print(f"Counter 1: {counter1}")
print(f"Counter 2: {counter2}")
print(f"Addition: {counter1 + counter2}")
print(f"Subtraction: {counter1 - counter2}")
print(f"Intersection: {counter1 & counter2}")
print(f"Union: {counter1 | counter2}")
# Update counter
counter1.update(['e', 'e', 'f'])
print(f"After update: {counter1}")
# Total count
print(f"Total items: {sum(counter1.values())}")
# Elements (repeat each element count times)
print(f"All elements: {list(counter1.elements())}")Counter 1: Counter({'a': 2, 'b': 1, 'c': 1})
Counter 2: Counter({'b': 2, 'a': 1, 'd': 1})
Addition: Counter({'a': 3, 'b': 3, 'c': 1, 'd': 1})
Subtraction: Counter({'a': 1, 'c': 1})
Intersection: Counter({'a': 1, 'b': 1})
Union: Counter({'a': 2, 'b': 2, 'c': 1, 'd': 1})
After update: Counter({'a': 2, 'e': 2, 'b': 1, 'c': 1, 'f': 1})
Total items: 7
All elements: ['a', 'a', 'b', 'c', 'e', 'e', 'f']
# defaultdict - Dictionary with Default Values
print("=== defaultdict Examples ===")
# Compare regular dict vs defaultdict
print("Regular dict:")
regular_dict = {}
try:
print(regular_dict['missing_key'])
except KeyError:
print("KeyError: 'missing_key' not found")=== defaultdict Examples ===
Regular dict:
KeyError: 'missing_key' not found
print("\ndefaultdict with list:")
list_dict = defaultdict(list)
list_dict['fruits'].append('apple')
list_dict['fruits'].append('banana')
list_dict['vegetables'].append('carrot')
print(f"list_dict: {dict(list_dict)}")
print(f"Missing key returns: {list_dict['missing']}") # Returns empty list
# Different default factories
int_dict = defaultdict(int) # Default value: 0
int_dict['count'] += 1
int_dict['total'] += 5
print(f"int_dict: {dict(int_dict)}")
print(f"Missing key returns: {int_dict['new_key']}") # Returns 0
set_dict = defaultdict(set) # Default value: empty set
set_dict['tags'].add('python')
set_dict['tags'].add('programming')
print(f"set_dict: {dict(set_dict)}")
defaultdict with list:
list_dict: {'fruits': ['apple', 'banana'], 'vegetables': ['carrot']}
Missing key returns: []
# Word frequency using defaultdict
text = "the quick brown fox jumps over the lazy dog"
word_freq = defaultdict(lambda: 0)
for word in text.split():
word_freq[word] += 1
print(f"Word frequencies: {dict(word_freq)}")Word frequencies: {'the': 2, 'quick': 1, 'brown': 1, 'fox': 1, 'jumps': 1, 'over': 1, 'lazy': 1, 'dog': 1}
# namedtuple - Tuple Subclass with Named Fields
print("=== namedtuple Examples ===")
# Create a namedtuple class
Point = namedtuple('Point', ['x', 'y'])
Person = namedtuple('Person', ['name', 'age', 'city'])
# Create instances
p1 = Point(3, 4)
p2 = Point(x=1, y=2) # Can use keyword arguments
person = Person('Alice', 30, 'New York')
print(f"Point p1: {p1}")
print(f"Point p2: {p2}")
print(f"Person: {person}")=== namedtuple Examples ===
Point p1: Point(x=3, y=4)
Point p2: Point(x=1, y=2)
Person: Person(name='Alice', age=30, city='New York')
# Access fields by name (more readable than tuple[0])
print(f"p1.x = {p1.x}, p1.y = {p1.y}")
print(f"Person name: {person.name}, age: {person.age}")
# Still works like a tuple
print(f"p1[0] = {p1[0]}, p1[1] = {p1[1]}")
x, y = p1 # Unpacking works
print(f"Unpacked: x={x}, y={y}")p1.x = 3, p1.y = 4
Person name: Alice, age: 30
p1[0] = 3, p1[1] = 4
Unpacked: x=3, y=4
p1.x = 4--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[46], line 1 ----> 1 p1.x = 4 AttributeError: can't set attribute
# namedtuple methods
print(f"Fields: {person._fields}")
print(f"As dict: {person._asdict()}")
# Create new instance with some fields changed
person2 = person._replace(age=31)
print(f"Updated person: {person2}")
# Benefits over regular tuples and dicts
print("\nBenefits of namedtuple:")
print("✅ More readable than tuple[index]")
print("✅ More memory efficient than dict")
print("✅ Immutable (like tuples)")
print("✅ Works with all tuple operations")
print("✅ Self-documenting field names")Fields: ('name', 'age', 'city')
As dict: {'name': 'Alice', 'age': 30, 'city': 'New York'}
Updated person: Person(name='Alice', age=31, city='New York')
Benefits of namedtuple:
✅ More readable than tuple[index]
✅ More memory efficient than dict
✅ Immutable (like tuples)
✅ Works with all tuple operations
✅ Self-documenting field names
The functools module provides utilities for working with higher-order functions and operations on callable objects. It’s essential for functional programming patterns in Python.
# @lru_cache - Memoization for Performance
import functools
import time
print("=== @lru_cache Examples ===")
# Expensive function without caching
def fibonacci_slow(n):
"""Slow recursive fibonacci"""
if n < 2:
return n
return fibonacci_slow(n-1) + fibonacci_slow(n-2)
# Same function with caching
@functools.lru_cache(maxsize=128)
def fibonacci_cached(n):
"""Fast cached fibonacci"""
if n < 2:
return n
return fibonacci_cached(n-1) + fibonacci_cached(n-2)
# Performance comparison
def time_function(func, n):
start = time.time()
result = func(n)
end = time.time()
return result, end - start
# Test with smaller number for slow version
n = 30
result_slow, time_slow = time_function(fibonacci_slow, n)
result_cached, time_cached = time_function(fibonacci_cached, n)
print(f"Fibonacci({n}):")
print(f"Without cache: {result_slow} (took {time_slow:.6f} seconds)")
print(f"With cache: {result_cached} (took {time_cached:.6f} seconds)")
print(f"Speedup: {time_slow / time_cached:.1f}x faster!")
# Cache statistics
print(f"Cache info: {fibonacci_cached.cache_info()}")
# Clear cache
fibonacci_cached.cache_clear()
print(f"After clearing cache: {fibonacci_cached.cache_info()}")
# Different cache sizes
@functools.lru_cache(maxsize=None) # Unlimited cache
def unlimited_cache_func(x):
return x ** 2
@functools.lru_cache(maxsize=2) # Very small cache
def small_cache_func(x):
return x ** 2
# Test cache behavior
print("\nCache behavior with different sizes:")
for i in range(5):
small_cache_func(i)
print(f"Small cache info: {small_cache_func.cache_info()}")
# Practical example: API call caching
@functools.lru_cache(maxsize=100)
def fetch_user_data(user_id):
"""Simulate expensive API call"""
print(f"Fetching data for user {user_id}...")
time.sleep(0.1) # Simulate network delay
return {'id': user_id, 'name': f'User{user_id}', 'email': f'user{user_id}@example.com'}
# First calls are slow
start = time.time()
for user_id in [1, 2, 3, 1, 2]: # Note: 1 and 2 are repeated
user_data = fetch_user_data(user_id)
print(f"Got: {user_data['name']}")
end = time.time()
print(f"Total time: {end - start:.2f} seconds")
print(f"Cache info: {fetch_user_data.cache_info()}")# reduce and Other Functools Utilities
print("=== reduce Examples ===")
# reduce applies function cumulatively to sequence
numbers = [1, 2, 3, 4, 5]
# Sum using reduce
total = functools.reduce(lambda x, y: x + y, numbers)
print(f"Sum of {numbers} = {total}")
# Product using reduce
product = functools.reduce(lambda x, y: x * y, numbers)
print(f"Product of {numbers} = {product}")
# Maximum using reduce
maximum = functools.reduce(lambda x, y: x if x > y else y, numbers)
print(f"Maximum of {numbers} = {maximum}")
# Reduce with initial value
total_with_init = functools.reduce(lambda x, y: x + y, numbers, 100)
print(f"Sum with initial value 100: {total_with_init}")
# String operations with reduce
words = ['Python', 'is', 'awesome', 'for', 'data', 'science']
sentence = functools.reduce(lambda x, y: x + ' ' + y, words)
print(f"Sentence: {sentence}")
# Practical example: Nested dictionary access
nested_dict = {
'level1': {
'level2': {
'level3': {
'data': 'found it!'
}
}
}
}
def get_nested(dictionary, key):
return dictionary[key]
keys = ['level1', 'level2', 'level3', 'data']
result = functools.reduce(get_nested, keys, nested_dict)
print(f"Nested access result: {result}")
print("\n=== @wraps - Proper Decorator Creation ===")
# Without @wraps (bad)
def bad_decorator(func):
def wrapper(*args, **kwargs):
print(f"Calling {func.__name__}")
return func(*args, **kwargs)
return wrapper
# With @wraps (good)
def good_decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
print(f"Calling {func.__name__}")
return func(*args, **kwargs)
return wrapper
@bad_decorator
def function_with_bad_decorator():
"""This function has a bad decorator"""
return "Hello from bad decorator"
@good_decorator
def function_with_good_decorator():
"""This function has a good decorator"""
return "Hello from good decorator"
# Compare the results
print(f"Bad decorator name: {function_with_bad_decorator.__name__}")
print(f"Bad decorator doc: {function_with_bad_decorator.__doc__}")
print(f"Good decorator name: {function_with_good_decorator.__name__}")
print(f"Good decorator doc: {function_with_good_decorator.__doc__}")
print("\n=== @singledispatch - Function Overloading ===")
@functools.singledispatch
def process_data(arg):
"""Default implementation"""
print(f"Processing unknown type: {type(arg).__name__}")
return str(arg)
@process_data.register
def _(arg: int):
"""Handle integers"""
print(f"Processing integer: {arg}")
return arg * 2
@process_data.register
def _(arg: str):
"""Handle strings"""
print(f"Processing string: {arg}")
return arg.upper()
@process_data.register
def _(arg: list):
"""Handle lists"""
print(f"Processing list of {len(arg)} items")
return sum(arg) if all(isinstance(x, (int, float)) for x in arg) else len(arg)
# Test singledispatch
test_values = [42, "hello", [1, 2, 3, 4], {'key': 'value'}]
for value in test_values:
result = process_data(value)
print(f"Result: {result}\n")
print("=== Summary ===")
print("✅ @lru_cache: Automatic memoization for expensive functions")
print("✅ partial: Create specialized functions by fixing some arguments")
print("✅ reduce: Apply function cumulatively to sequences")
print("✅ @wraps: Preserve function metadata in decorators")
print("✅ @singledispatch: Function overloading based on first argument type")YAML (YAML Ain’t Markup Language) is a human-readable data serialization standard commonly used for configuration files, data exchange, and documentation.
pip install PyYAML# Basic YAML Syntax and Data Types
import yaml
import json
from io import StringIO
print("=== Basic YAML Data Types ===")
# Basic YAML content
basic_yaml = """
# This is a comment
string_value: "Hello, World!"
integer_value: 42
float_value: 3.14159
boolean_true: true
boolean_false: false
null_value: null
empty_value:
# Different string formats
single_quoted: 'Single quotes'
double_quoted: "Double quotes"
unquoted: Unquoted string
multiline_string: |
This is a multiline string
that preserves line breaks
and formatting.
folded_string: >
This is a folded string
that will be joined into
a single line with spaces.
"""
# Parse YAML
try:
data = yaml.safe_load(basic_yaml)
print("Parsed YAML data:")
for key, value in data.items():
print(f" {key}: {value} ({type(value).__name__})")
except yaml.YAMLError as e:
print(f"YAML Error: {e}")
print(f"\nMultiline string:\n{repr(data['multiline_string'])}")
print(f"\nFolded string:\n{repr(data['folded_string'])}")
# Convert back to YAML
print("\n=== Converting Python to YAML ===")
python_data = {
'name': 'Alice',
'age': 30,
'active': True,
'score': 95.5,
'metadata': None
}
yaml_output = yaml.dump(python_data, default_flow_style=False)
print("Python dict to YAML:")
print(yaml_output)
# Pretty printing with custom formatting
yaml_formatted = yaml.dump(
python_data,
default_flow_style=False,
indent=2,
sort_keys=True
)
print("Formatted YAML:")
print(yaml_formatted)# YAML Collections and Complex Data Structures
print("=== YAML Collections ===")
collections_yaml = """
# Lists (Arrays)
fruits:
- apple
- banana
- cherry
# Inline list format
colors: [red, green, blue]
# Nested lists
matrix:
- [1, 2, 3]
- [4, 5, 6]
- [7, 8, 9]
# Dictionaries (Maps)
person:
name: Alice
age: 30
address:
street: 123 Main St
city: New York
zip: 10001
# Inline dictionary format
point: {x: 10, y: 20}
# List of dictionaries
employees:
- name: Alice
department: Engineering
salary: 75000
- name: Bob
department: Marketing
salary: 65000
- name: Charlie
department: Engineering
salary: 80000
# Mixed complex structure
project:
name: "Data Analysis Tool"
version: "1.2.3"
dependencies:
- numpy>=1.20.0
- pandas>=1.3.0
- matplotlib>=3.4.0
config:
debug: false
max_workers: 4
output_formats: [csv, json, xlsx]
team:
lead: Alice
members:
- Bob
- Charlie
- Diana
"""
# Parse complex YAML
collections_data = yaml.safe_load(collections_yaml)
print("Fruits list:", collections_data['fruits'])
print("Person info:", collections_data['person'])
print("Project dependencies:", collections_data['project']['dependencies'])
# Access nested data
print(f"\nProject lead: {collections_data['project']['team']['lead']}")
print(f"First employee: {collections_data['employees'][0]['name']}")
print(f"Address city: {collections_data['person']['address']['city']}")
# Convert complex Python structure to YAML
complex_data = {
'database': {
'host': 'localhost',
'port': 5432,
'credentials': {
'username': 'admin',
'password': 'secret123'
},
'pools': {
'min_connections': 5,
'max_connections': 20
}
},
'api': {
'endpoints': [
{'path': '/users', 'methods': ['GET', 'POST']},
{'path': '/orders', 'methods': ['GET', 'POST', 'PUT']},
{'path': '/health', 'methods': ['GET']}
],
'rate_limiting': {
'enabled': True,
'requests_per_minute': 1000
}
},
'logging': {
'level': 'INFO',
'handlers': ['console', 'file'],
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
}
}
yaml_complex = yaml.dump(complex_data, default_flow_style=False, indent=2)
print("\n=== Complex Data as YAML ===")
print(yaml_complex)# Advanced YAML Features
print("=== YAML Anchors and References ===")
# YAML with anchors (&) and references (*)
advanced_yaml = """
# Define anchors for reuse
defaults: &defaults
timeout: 30
retries: 3
log_level: INFO
# Database configurations using anchors
database_config:
development:
<<: *defaults # Merge defaults
host: dev-db.example.com
port: 5432
debug: true
staging:
<<: *defaults
host: staging-db.example.com
port: 5432
debug: false
production:
<<: *defaults
host: prod-db.example.com
port: 5432
timeout: 60 # Override default timeout
debug: false
# Reference the same values
admin_user: &admin
username: admin
permissions:
- read
- write
- delete
# Use the reference
users:
primary: *admin
backup: *admin
# List anchors
common_dependencies: &common_deps
- numpy
- pandas
- matplotlib
projects:
project_a:
name: "Data Analysis"
dependencies:
- *common_deps
- scipy
- sklearn
project_b:
name: "Visualization"
dependencies:
- *common_deps
- seaborn
- plotly
"""
advanced_data = yaml.safe_load(advanced_yaml)
print("Development config:", advanced_data['database_config']['development'])
print("Production config:", advanced_data['database_config']['production'])
print("Admin user:", advanced_data['users']['primary'])
print("\n=== Multi-Document YAML ===")
# Multiple documents in one YAML string
multi_doc_yaml = """
---
# Document 1: Configuration
name: "Application Config"
version: "1.0"
settings:
debug: true
port: 8080
---
# Document 2: Users
users:
- name: Alice
role: admin
- name: Bob
role: user
---
# Document 3: Database
database:
host: localhost
port: 5432
name: myapp_db
"""
# Load all documents
documents = list(yaml.safe_load_all(multi_doc_yaml))
print(f"Number of documents: {len(documents)}")
for i, doc in enumerate(documents, 1):
print(f"Document {i}: {list(doc.keys())}")
print("\n=== YAML Validation and Error Handling ===")
# Invalid YAML examples
invalid_yamls = [
"key: value\n invalid_indent: bad", # Inconsistent indentation
"key: [unclosed, list", # Unclosed bracket
"duplicate_key: value1\nduplicate_key: value2" # Duplicate keys (allowed by default)
]
for i, invalid_yaml in enumerate(invalid_yamls, 1):
try:
result = yaml.safe_load(invalid_yaml)
print(f"Invalid YAML {i} loaded: {result}")
except yaml.YAMLError as e:
print(f"Invalid YAML {i} error: {e}")
print("\n=== Safe vs Unsafe Loading ===")
# Potentially dangerous YAML (with Python objects)
dangerous_yaml = """
# This could be dangerous with yaml.load()
data: !!python/object/apply:os.system ['echo "This could be dangerous"']
safe_data: "This is safe"
"""
# Always use safe_load for untrusted input
try:
safe_result = yaml.safe_load(dangerous_yaml)
print("Safe load result:", safe_result)
except yaml.YAMLError as e:
print(f"Safe load error: {e}")
print("\n=== Working with Files ===")
# Create a sample config file content
config_content = {
'app': {
'name': 'My Application',
'version': '2.1.0',
'debug': False
},
'database': {
'host': 'localhost',
'port': 5432,
'name': 'app_db',
'pool_size': 10
},
'logging': {
'level': 'INFO',
'file': 'app.log',
'max_size': '10MB'
}
}
# Convert to YAML string (simulating file content)
yaml_file_content = yaml.dump(config_content, default_flow_style=False, indent=2)
print("Config file content:")
print(yaml_file_content)
# Parse it back
parsed_config = yaml.safe_load(yaml_file_content)
print("Parsed config:")
print(f"App name: {parsed_config['app']['name']}")
print(f"Database host: {parsed_config['database']['host']}")
print(f"Log level: {parsed_config['logging']['level']}")# Practical YAML Examples and Use Cases
print("=== Docker Compose Example ===")
docker_compose = """
version: '3.8'
services:
web:
build: .
ports:
- "8000:8000"
environment:
- DEBUG=1
- DATABASE_URL=postgresql://user:pass@db:5432/myapp
depends_on:
- db
- redis
volumes:
- .:/app
- /app/node_modules
db:
image: postgres:13
environment:
POSTGRES_DB: myapp
POSTGRES_USER: user
POSTGRES_PASSWORD: pass
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
redis:
image: redis:6-alpine
ports:
- "6379:6379"
volumes:
postgres_data:
networks:
default:
driver: bridge
"""
compose_data = yaml.safe_load(docker_compose)
print("Docker Compose services:", list(compose_data['services'].keys()))
print("Web service ports:", compose_data['services']['web']['ports'])
print("\n=== CI/CD Pipeline Example (GitHub Actions) ===")
github_actions = """
name: Python CI/CD
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
env:
PYTHON_VERSION: "3.9"
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest coverage
- name: Run tests
run: |
pytest tests/ --cov=src/
coverage xml
- name: Upload coverage
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
deploy:
needs: test
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- name: Deploy to production
run: |
echo "Deploying to production..."
# Add deployment commands here
"""
ci_data = yaml.safe_load(github_actions)
print("CI/CD jobs:", list(ci_data['jobs'].keys()))
print("Python versions tested:", ci_data['jobs']['test']['strategy']['matrix']['python-version'])
print("\n=== Kubernetes Configuration Example ===")
k8s_config = """
apiVersion: apps/v1
kind: Deployment
metadata:
name: web-app
labels:
app: web-app
spec:
replicas: 3
selector:
matchLabels:
app: web-app
template:
metadata:
labels:
app: web-app
spec:
containers:
- name: web-app
image: myapp:latest
ports:
- containerPort: 8000
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: db-secret
key: url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: web-app-service
spec:
selector:
app: web-app
ports:
- protocol: TCP
port: 80
targetPort: 8000
type: LoadBalancer
"""
k8s_docs = list(yaml.safe_load_all(k8s_config))
print("Kubernetes resources:", [doc['kind'] for doc in k8s_docs])
print("Deployment replicas:", k8s_docs[0]['spec']['replicas'])
print("\n=== Application Configuration Example ===")
# Real-world application config
app_config = {
'app': {
'name': 'Data Processing Pipeline',
'version': '2.3.1',
'environment': 'production'
},
'server': {
'host': '0.0.0.0',
'port': 8080,
'workers': 4,
'timeout': 30
},
'database': {
'primary': {
'host': 'db-primary.example.com',
'port': 5432,
'database': 'app_prod',
'pool': {
'min_connections': 5,
'max_connections': 20,
'timeout': 10
}
},
'replica': {
'host': 'db-replica.example.com',
'port': 5432,
'database': 'app_prod',
'readonly': True
}
},
'redis': {
'host': 'redis.example.com',
'port': 6379,
'db': 0,
'ttl': 3600
},
'logging': {
'level': 'INFO',
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
'handlers': [
{
'type': 'console',
'level': 'INFO'
},
{
'type': 'file',
'level': 'DEBUG',
'filename': '/var/log/app.log',
'max_bytes': 10485760, # 10MB
'backup_count': 5
}
]
},
'features': {
'enable_caching': True,
'enable_metrics': True,
'enable_tracing': False,
'max_file_size_mb': 100
}
}
# Convert to YAML
config_yaml = yaml.dump(app_config, default_flow_style=False, indent=2, sort_keys=True)
print("Application Configuration YAML:")
print(config_yaml[:500] + "...") # Show first 500 chars
print("\n=== YAML Best Practices ===")
print("""
✅ DO:
• Use consistent indentation (2 or 4 spaces)
• Add comments to explain complex configurations
• Use meaningful key names
• Validate YAML syntax before deployment
• Use anchors (&) and references (*) to avoid duplication
• Always use yaml.safe_load() for untrusted input
❌ DON'T:
• Mix tabs and spaces for indentation
• Use yaml.load() with untrusted input (security risk)
• Create overly nested structures (hard to read)
• Ignore YAML syntax errors
• Use special characters in keys without quotes
🔧 COMMON USE CASES:
• Configuration files (apps, servers, tools)
• CI/CD pipeline definitions (GitHub Actions, GitLab CI)
• Infrastructure as Code (Kubernetes, Docker Compose)
• Data exchange between systems
• Documentation with structured data
""")TOML (Tom’s Obvious Minimal Language) is a human-readable configuration file format that’s designed to be easy to read and write. It’s commonly used for configuration files in modern projects, especially in the Python ecosystem.
pyproject.toml, Rust’s Cargo.toml, and many other toolspyproject.toml for project configurationTOML support is built into Python 3.11+, but for older versions:
pip install tomli tomli-w # For reading and writing TOML
# or
pip install toml # Alternative library# Basic TOML Syntax and Data Types
import sys
# Python 3.11+ has built-in tomllib for reading TOML
if sys.version_info >= (3, 11):
import tomllib
# For writing, we still need external library
try:
import tomli_w
HAS_TOMLI_W = True
except ImportError:
HAS_TOMLI_W = False
print("⚠️ Install tomli-w for writing TOML: pip install tomli-w")
else:
# For older Python versions
try:
import tomli as tomllib
import tomli_w
HAS_TOMLI_W = True
except ImportError:
try:
import toml as tomllib
HAS_TOMLI_W = True
tomli_w = tomllib # toml library can both read and write
except ImportError:
print("❌ Install TOML library: pip install tomli tomli-w")
print(" Or for Python 3.11+, no installation needed for reading")
print("=== TOML Basic Data Types ===")
# Basic TOML syntax examples
basic_toml = '''
# This is a TOML comment
# Key-value pairs
title = "My Application"
version = "1.0.0"
debug = true
port = 8080
pi = 3.14159
# Strings
name = "John Doe"
multiline_string = """
This is a multiline string.
It can span multiple lines.
"""
# Dates and times
created_date = 2025-08-13
created_time = 2025-08-13T10:30:00Z
local_time = 10:30:00
# Arrays
numbers = [1, 2, 3, 4, 5]
mixed_types = ["string", 123, true, 3.14]
nested_arrays = [[1, 2], [3, 4], [5, 6]]
# Inline tables (like dictionaries)
database = { host = "localhost", port = 5432, name = "myapp" }
'''
# Parse the TOML string
try:
if sys.version_info >= (3, 11):
data = tomllib.loads(basic_toml)
else:
data = tomllib.loads(basic_toml)
print("✅ TOML parsed successfully!")
print(f"Title: {data['title']}")
print(f"Version: {data['version']}")
print(f"Debug mode: {data['debug']}")
print(f"Port: {data['port']}")
print(f"Numbers array: {data['numbers']}")
print(f"Database config: {data['database']}")
print(f"Created date: {data['created_date']} (type: {type(data['created_date'])})")
except Exception as e:
print(f"❌ Error parsing TOML: {e}")
print("\n=== Data Type Details ===")
print("TOML supports these data types:")
print("• String: 'text' or \"text\" or '''multiline'''")
print("• Integer: 123, +456, -789")
print("• Float: 3.14, -0.01, 5e+22")
print("• Boolean: true, false")
print("• Datetime: 1979-05-27T07:32:00Z")
print("• Local Datetime: 1979-05-27T07:32:00")
print("• Local Date: 1979-05-27")
print("• Local Time: 07:32:00")
print("• Array: [1, 2, 3]")
print("• Inline Table: { key = \"value\", num = 42 }")# TOML Tables and Sections
print("=== TOML Tables (Sections) ===")
# TOML with tables/sections
config_toml = '''
# Global settings
app_name = "My Web App"
version = "2.1.0"
# Table/Section for database configuration
[database]
host = "localhost"
port = 5432
username = "admin"
password = "secret123"
database_name = "myapp_prod"
ssl_mode = "require"
# Another table for server configuration
[server]
host = "0.0.0.0"
port = 8080
workers = 4
timeout = 30
enable_ssl = true
# Nested tables using dot notation
[logging.handlers]
console = { level = "INFO", format = "%(levelname)s: %(message)s" }
file = { level = "DEBUG", filename = "/var/log/app.log" }
[logging.loggers]
"myapp.database" = { level = "WARNING" }
"myapp.auth" = { level = "INFO" }
# Arrays of tables
[[users]]
name = "Alice"
email = "alice@example.com"
role = "admin"
active = true
[[users]]
name = "Bob"
email = "bob@example.com"
role = "user"
active = true
[[users]]
name = "Charlie"
email = "charlie@example.com"
role = "moderator"
active = false
# Complex nested structure
[features]
authentication = true
caching = true
rate_limiting = false
[features.cache]
backend = "redis"
ttl = 3600
max_size = "100MB"
[features.auth]
providers = ["local", "oauth", "ldap"]
session_timeout = 1800
max_attempts = 3
'''
# Parse the configuration
try:
config_data = tomllib.loads(config_toml)
print("✅ Configuration loaded successfully!")
print(f"App: {config_data['app_name']} v{config_data['version']}")
print(f"Database: {config_data['database']['host']}:{config_data['database']['port']}")
print(f"Server: {config_data['server']['host']}:{config_data['server']['port']}")
print(f"\nLogging handlers: {list(config_data['logging']['handlers'].keys())}")
print(f"Console log level: {config_data['logging']['handlers']['console']['level']}")
print(f"\nUsers ({len(config_data['users'])}):")
for user in config_data['users']:
status = "✅" if user['active'] else "❌"
print(f" {status} {user['name']} ({user['role']}) - {user['email']}")
print(f"\nFeatures enabled:")
for feature, enabled in config_data['features'].items():
if isinstance(enabled, bool):
status = "✅" if enabled else "❌"
print(f" {status} {feature}")
print(f"\nCache configuration:")
cache_config = config_data['features']['cache']
print(f" Backend: {cache_config['backend']}")
print(f" TTL: {cache_config['ttl']} seconds")
print(f" Max size: {cache_config['max_size']}")
except Exception as e:
print(f"❌ Error parsing configuration: {e}")
print("\n=== TOML Table Syntax Rules ===")
print("""
1. [table] - Creates a new table/section
2. [parent.child] - Creates nested table
3. [[array_of_tables]] - Creates array of tables
4. Key-value pairs under a table belong to that table
5. Tables can be defined in any order
6. Duplicate table names are not allowed
""")# Writing TOML and File Operations
from pathlib import Path
import tempfile
from datetime import datetime, date, time
print("=== Writing TOML Data ===")
# Create sample configuration data
app_config = {
'app': {
'name': 'Data Processing Pipeline',
'version': '3.2.1',
'description': 'A high-performance data processing application',
'author': 'Development Team',
'license': 'MIT'
},
'server': {
'host': '0.0.0.0',
'port': 8080,
'workers': 4,
'debug': False,
'reload': True
},
'database': {
'primary': {
'host': 'db-primary.example.com',
'port': 5432,
'database': 'app_prod',
'username': 'app_user',
'ssl_mode': 'require',
'pool_size': 20,
'timeout': 30.0
},
'replica': {
'host': 'db-replica.example.com',
'port': 5432,
'database': 'app_prod',
'readonly': True
}
},
'cache': {
'backend': 'redis',
'host': 'cache.example.com',
'port': 6379,
'db': 0,
'ttl': 3600,
'max_memory': '2GB'
},
'logging': {
'level': 'INFO',
'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
'file': '/var/log/app.log',
'rotation': {
'max_size': '100MB',
'backup_count': 5
}
},
'features': {
'authentication': True,
'rate_limiting': True,
'monitoring': True,
'analytics': False
},
'api': {
'version': 'v2',
'base_url': '/api/v2',
'rate_limit': 1000,
'timeout': 30,
'allowed_methods': ['GET', 'POST', 'PUT', 'DELETE'],
'cors': {
'enabled': True,
'origins': ['https://app.example.com', 'https://admin.example.com'],
'methods': ['GET', 'POST', 'PUT'],
'headers': ['Content-Type', 'Authorization']
}
},
'monitoring': [
{
'name': 'Prometheus',
'endpoint': '/metrics',
'enabled': True,
'interval': 30
},
{
'name': 'Health Check',
'endpoint': '/health',
'enabled': True,
'interval': 10
}
],
'metadata': {
'created': datetime.now().date(),
'last_updated': datetime.now().date(),
'config_version': '2.0'
}
}
# Write TOML to string
if HAS_TOMLI_W:
try:
toml_string = tomli_w.dumps(app_config)
print("✅ TOML string created successfully!")
print("First 500 characters:")
print(toml_string[:500])
print("...")
# Write to temporary file and read it back
with tempfile.NamedTemporaryFile(mode='w', suffix='.toml', delete=False) as f:
f.write(toml_string)
temp_file = f.name
print(f"\n✅ TOML written to temporary file: {temp_file}")
# Read the file back
with open(temp_file, 'rb') as f:
loaded_config = tomllib.load(f)
print("✅ TOML file read back successfully!")
print(f"App name: {loaded_config['app']['name']}")
print(f"App version: {loaded_config['app']['version']}")
print(f"Database host: {loaded_config['database']['primary']['host']}")
print(f"API endpoints: {len(loaded_config['monitoring'])} monitoring endpoints")
# Clean up
Path(temp_file).unlink()
print(f"🗑️ Temporary file cleaned up")
except Exception as e:
print(f"❌ Error with TOML operations: {e}")
else:
print("⚠️ TOML writing not available. Install tomli-w or toml library.")
print("\n=== TOML vs Other Formats ===")
print("TOML advantages:")
print("✅ More readable than JSON")
print("✅ More structured than INI")
print("✅ Less verbose than XML/YAML")
print("✅ Built-in data types (dates, times)")
print("✅ Comments support")
print("✅ No ambiguity in parsing")
print("\nTOML limitations:")
print("❌ Less flexible than YAML")
print("❌ No multi-line arrays")
print("❌ Limited nesting compared to JSON")
print("❌ Newer format (less tooling)")
print("\n=== Best Practices ===")
print("1. Use TOML for configuration files")
print("2. Group related settings in tables")
print("3. Use meaningful comments")
print("4. Prefer explicit over implicit")
print("5. Use arrays of tables for repeated structures")
print("6. Keep nesting reasonable (max 2-3 levels)")# Real-World TOML Examples and Use Cases
print("=== Python pyproject.toml Example ===")
pyproject_toml = '''
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "my-awesome-package"
version = "0.1.0"
description = "A package that does awesome things"
authors = ["Your Name <you@example.com>"]
license = "MIT"
readme = "README.md"
homepage = "https://github.com/yourusername/my-awesome-package"
repository = "https://github.com/yourusername/my-awesome-package"
documentation = "https://my-awesome-package.readthedocs.io"
keywords = ["python", "awesome", "package"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
]
[tool.poetry.dependencies]
python = "^3.8"
requests = "^2.28.0"
click = "^8.1.0"
pydantic = "^1.10.0"
[tool.poetry.group.dev.dependencies]
pytest = "^7.1.0"
black = "^22.3.0"
isort = "^5.10.0"
flake8 = "^4.0.0"
mypy = "^0.961"
pre-commit = "^2.19.0"
[tool.poetry.scripts]
my-cli = "my_package.cli:main"
[tool.black]
line-length = 88
target-version = ['py38', 'py39', 'py310', 'py311']
include = '\\.pyi?$'
exclude = '''
/(
\\.git
| \\.venv
| build
| dist
)/
'''
[tool.isort]
profile = "black"
multi_line_output = 3
line_length = 88
[tool.mypy]
python_version = "3.8"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "-v --tb=short --strict-markers"
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests",
]
'''
try:
pyproject_data = tomllib.loads(pyproject_toml)
print("✅ pyproject.toml parsed successfully!")
print(f"Package: {pyproject_data['tool']['poetry']['name']}")
print(f"Version: {pyproject_data['tool']['poetry']['version']}")
print(f"Python requirement: {pyproject_data['tool']['poetry']['dependencies']['python']}")
print(f"Dependencies: {len(pyproject_data['tool']['poetry']['dependencies']) - 1}") # -1 for python
print(f"Dev dependencies: {len(pyproject_data['tool']['poetry']['group']['dev']['dependencies'])}")
print(f"Black line length: {pyproject_data['tool']['black']['line-length']}")
except Exception as e:
print(f"❌ Error parsing pyproject.toml: {e}")
print("\n=== Rust Cargo.toml Example ===")
cargo_toml = '''
[package]
name = "my-rust-app"
version = "0.1.0"
edition = "2021"
authors = ["Your Name <you@example.com>"]
license = "MIT OR Apache-2.0"
description = "A fast and reliable Rust application"
homepage = "https://github.com/yourusername/my-rust-app"
repository = "https://github.com/yourusername/my-rust-app"
readme = "README.md"
keywords = ["cli", "performance", "rust"]
categories = ["command-line-utilities"]
[dependencies]
clap = { version = "4.0", features = ["derive"] }
tokio = { version = "1.0", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = { version = "0.11", features = ["json"] }
anyhow = "1.0"
tracing = "0.1"
tracing-subscriber = "0.3"
[dev-dependencies]
criterion = "0.5"
tempfile = "3.0"
[[bin]]
name = "my-app"
path = "src/main.rs"
[[bench]]
name = "performance"
harness = false
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
panic = "abort"
[profile.dev]
opt-level = 0
debug = true
'''
try:
cargo_data = tomllib.loads(cargo_toml)
print("✅ Cargo.toml parsed successfully!")
print(f"Package: {cargo_data['package']['name']}")
print(f"Edition: {cargo_data['package']['edition']}")
print(f"Dependencies: {len(cargo_data['dependencies'])}")
print(f"Release optimization: {cargo_data['profile']['release']['opt-level']}")
except Exception as e:
print(f"❌ Error parsing Cargo.toml: {e}")
print("\n=== Application Configuration Example ===")
app_config_toml = '''
# Application Configuration
[app]
name = "E-commerce API"
version = "2.4.1"
environment = "production"
debug = false
[server]
host = "0.0.0.0"
port = 8000
workers = 8
keep_alive = 65
max_request_size = "10MB"
[database]
url = "postgresql://user:pass@db.example.com:5432/ecommerce"
pool_size = 20
max_overflow = 30
pool_timeout = 30
pool_recycle = 3600
[cache]
backend = "redis"
url = "redis://cache.example.com:6379/0"
default_timeout = 300
key_prefix = "ecom:"
[security]
secret_key = "your-super-secret-key-here"
algorithm = "HS256"
access_token_expire_minutes = 30
refresh_token_expire_days = 7
[email]
smtp_host = "smtp.example.com"
smtp_port = 587
smtp_user = "noreply@example.com"
smtp_password = "email-password"
use_tls = true
[logging]
level = "INFO"
format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
[logging.handlers.file]
filename = "/var/log/ecommerce-api.log"
max_bytes = 10485760 # 10MB
backup_count = 5
[logging.handlers.console]
stream = "ext://sys.stdout"
# Payment providers
[[payment.providers]]
name = "stripe"
api_key = "sk_live_..."
webhook_secret = "whsec_..."
enabled = true
[[payment.providers]]
name = "paypal"
client_id = "paypal_client_id"
client_secret = "paypal_secret"
sandbox = false
enabled = true
# Feature flags
[features]
user_registration = true
email_verification = true
two_factor_auth = false
social_login = true
product_reviews = true
wishlist = true
recommendations = false
'''
try:
app_config_data = tomllib.loads(app_config_toml)
print("✅ Application config parsed successfully!")
print(f"App: {app_config_data['app']['name']} v{app_config_data['app']['version']}")
print(f"Environment: {app_config_data['app']['environment']}")
print(f"Server: {app_config_data['server']['host']}:{app_config_data['server']['port']}")
print(f"Workers: {app_config_data['server']['workers']}")
print(f"Payment providers: {len(app_config_data['payment']['providers'])}")
enabled_features = [k for k, v in app_config_data['features'].items() if v]
print(f"Enabled features: {', '.join(enabled_features)}")
except Exception as e:
print(f"❌ Error parsing application config: {e}")
print("\n=== TOML Use Cases Summary ===")
print("""
🐍 Python Projects:
• pyproject.toml - Project metadata, dependencies, tool configuration
• setup.cfg alternative for project configuration
• Tool-specific configs (black, isort, pytest, mypy)
🦀 Rust Projects:
• Cargo.toml - Package metadata, dependencies, build configuration
• Workspace configuration for multi-crate projects
⚙️ Applications:
• Configuration files for web applications
• Database connection settings
• API configurations and feature flags
• CI/CD pipeline configurations
🔧 Tools & Systems:
• Docker Compose alternatives
• Infrastructure as Code configurations
• Static site generators (Hugo, etc.)
• Package managers and build tools
""")Congratulations! You’ve explored 7 essential Python libraries that will significantly enhance your programming productivity and capabilities:
Path.glob() for pattern matchingic.configureOutput() for custom formatting@njit for automatic optimizationCounter for frequency analysis@lru_cache for automatic memoizationos.path for all new projectspyproject.toml and clear, type-safe configsHappy coding! 🐍✨