Computer vision enables machines to interpret and understand visual information from the world. From self-driving cars to medical imaging to augmented reality, the applications are vast and growing. OpenCV (Open Source Computer Vision Library) is the most widely used library for computer vision, offering over 2,500 optimized algorithms. In this tutorial, we will cover the fundamentals of image processing with OpenCV and Python, building up from basic operations to edge detection and face detection with Haar cascades.
Installing OpenCV and Loading Images
OpenCV installs easily via pip. The opencv-python package includes the main modules, while opencv-contrib-python adds extra features:
pip install opencv-python numpy matplotlib
The core of OpenCV is the cv2.imread() function, which loads images as NumPy arrays. Each pixel is represented as an array of color channel values:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Load an image
img = cv2.imread("photo.jpg")
# OpenCV loads images in BGR format, not RGB
print(f"Image shape: {img.shape}") # (height, width, channels)
print(f"Image dtype: {img.dtype}") # uint8 (0-255)
print(f"Image size: {img.size} pixels")
# Convert BGR to RGB for matplotlib display
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Display the image
plt.figure(figsize=(10, 8))
plt.imshow(img_rgb)
plt.title("Original Image")
plt.axis("off")
plt.show()
# Access individual pixel values (row, col)
pixel = img[100, 200] # BGR values at row 100, col 200
print(f"Pixel at (100, 200): B={pixel[0]}, G={pixel[1]}, R={pixel[2]}")
A common gotcha for beginners: OpenCV uses BGR channel ordering by default, while most other libraries (matplotlib, PIL, web) use RGB. Always convert with cv2.cvtColor() when displaying or integrating with other tools.
Grayscale Conversion and Basic Transformations
Many computer vision algorithms work on grayscale images because they reduce computational complexity from three channels to one while preserving structural information:
def basic_transformations(image_path: str):
"""Demonstrate fundamental image transformations."""
img = cv2.imread(image_path)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print(f"Grayscale shape: {gray.shape}") # (height, width) -- single channel
# Resize the image
height, width = img.shape[:2]
resized = cv2.resize(img, (width // 2, height // 2),
interpolation=cv2.INTER_AREA)
# Rotate the image 45 degrees
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, scale=1.0)
rotated = cv2.warpAffine(img, rotation_matrix, (width, height))
# Flip horizontally and vertically
flipped_h = cv2.flip(img, 1) # 1 = horizontal
flipped_v = cv2.flip(img, 0) # 0 = vertical
# Crop a region of interest (ROI)
roi = img[50:250, 100:400] # rows 50-250, cols 100-400
# Apply Gaussian blur for noise reduction
blurred = cv2.GaussianBlur(img, (15, 15), 0)
# Adjust brightness and contrast
# new_image = alpha * image + beta (alpha=contrast, beta=brightness)
bright = cv2.convertScaleAbs(img, alpha=1.3, beta=40)
return {
"grayscale": gray,
"resized": resized,
"rotated": rotated,
"flipped": flipped_h,
"cropped": roi,
"blurred": blurred,
"brightened": bright
}
results = basic_transformations("photo.jpg")
for name, image in results.items():
print(f" {name}: shape={image.shape}")
The cv2.GaussianBlur() is particularly important as a preprocessing step. Most edge detection and feature detection algorithms are sensitive to noise, and blurring smooths out irrelevant details while preserving meaningful edges.
Edge Detection with Canny
Edge detection identifies boundaries in images where pixel intensity changes sharply. The Canny edge detector is the gold standard, combining Gaussian smoothing, gradient computation, non-maximum suppression, and hysteresis thresholding:
def detect_edges(image_path: str):
"""Apply multiple edge detection techniques."""
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Reduce noise with Gaussian blur
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# Canny edge detection
# low_threshold and high_threshold control sensitivity
edges_canny = cv2.Canny(blurred, threshold1=50, threshold2=150)
# Compare with different thresholds
edges_tight = cv2.Canny(blurred, threshold1=100, threshold2=200)
edges_wide = cv2.Canny(blurred, threshold1=30, threshold2=100)
# Sobel edge detection (gradient-based)
sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) # horizontal
sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3) # vertical
sobel_combined = cv2.magnitude(sobel_x, sobel_y)
sobel_combined = np.uint8(np.clip(sobel_combined, 0, 255))
# Laplacian edge detection
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
laplacian = np.uint8(np.absolute(laplacian))
# Display results
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
images = [gray, edges_wide, edges_canny,
edges_tight, sobel_combined, laplacian]
titles = ["Grayscale", "Canny (wide)", "Canny (balanced)",
"Canny (tight)", "Sobel", "Laplacian"]
for ax, image, title in zip(axes.flat, images, titles):
ax.imshow(image, cmap="gray")
ax.set_title(title)
ax.axis("off")
plt.tight_layout()
plt.show()
return edges_canny
edges = detect_edges("photo.jpg")
print(f"Edge map shape: {edges.shape}")
print(f"Edge pixels: {np.count_nonzero(edges)}")
The two Canny thresholds work together through hysteresis. Pixels with gradient magnitude above the high threshold are definite edges. Pixels between the two thresholds are edges only if connected to definite edges. Pixels below the low threshold are discarded. This produces clean, connected edge contours.
Face Detection with Haar Cascades
OpenCV ships with pre-trained Haar cascade classifiers for detecting faces, eyes, and other objects. While deep learning detectors are more accurate, Haar cascades are fast and require no GPU:
def detect_faces(image_path: str):
"""Detect faces in an image using Haar cascade classifier."""
img = cv2.imread(image_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Load the pre-trained face cascade
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
eye_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_eye.xml"
)
# Detect faces
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1, # image size reduction at each scale
minNeighbors=5, # min detections to confirm a face
minSize=(30, 30) # minimum face size in pixels
)
print(f"Found {len(faces)} face(s)")
# Draw rectangles around detected faces
output = img.copy()
for (x, y, w, h) in faces:
# Green rectangle around face
cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Label with confidence
cv2.putText(output, "Face", (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
# Detect eyes within the face region
face_roi_gray = gray[y:y+h, x:x+w]
face_roi_color = output[y:y+h, x:x+w]
eyes = eye_cascade.detectMultiScale(
face_roi_gray,
scaleFactor=1.1,
minNeighbors=10,
minSize=(20, 20)
)
for (ex, ey, ew, eh) in eyes:
center = (ex + ew // 2, ey + eh // 2)
radius = max(ew, eh) // 2
cv2.circle(face_roi_color, center, radius, (255, 0, 0), 2)
# Save and display result
cv2.imwrite("faces_detected.jpg", output)
output_rgb = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(10, 8))
plt.imshow(output_rgb)
plt.title(f"Detected {len(faces)} face(s)")
plt.axis("off")
plt.show()
return faces
faces = detect_faces("group_photo.jpg")
for i, (x, y, w, h) in enumerate(faces):
print(f" Face {i+1}: position=({x},{y}), size={w}x{h}")
The scaleFactor controls how much the image is downscaled at each detection pass. A smaller value (like 1.05) is more thorough but slower. The minNeighbors parameter filters out false positives; higher values require more overlapping detections to confirm a face, reducing false alarms at the cost of potentially missing some faces.
Putting It All Together: An Image Processing Pipeline
Here is a complete pipeline that loads an image, applies preprocessing, detects edges and faces, and saves annotated results:
def image_analysis_pipeline(image_path: str, output_dir: str = "."):
"""Complete image analysis pipeline."""
import os
img = cv2.imread(image_path)
if img is None:
raise FileNotFoundError(f"Cannot load image: {image_path}")
filename = os.path.splitext(os.path.basename(image_path))[0]
# Step 1: Basic info
h, w, c = img.shape
print(f"Analyzing: {image_path}")
print(f" Dimensions: {w}x{h}, Channels: {c}")
# Step 2: Grayscale + edge detection
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(blurred, 50, 150)
cv2.imwrite(f"{output_dir}/{filename}_edges.jpg", edges)
# Step 3: Face detection
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
faces = face_cascade.detectMultiScale(gray, 1.1, 5, minSize=(30, 30))
annotated = img.copy()
for (x, y, fw, fh) in faces:
cv2.rectangle(annotated, (x, y), (x+fw, y+fh), (0, 255, 0), 2)
cv2.imwrite(f"{output_dir}/{filename}_annotated.jpg", annotated)
print(f" Edges saved: {filename}_edges.jpg")
print(f" Faces found: {len(faces)}")
print(f" Annotated saved: {filename}_annotated.jpg")
image_analysis_pipeline("photo.jpg", output_dir="./output")
Conclusion
We covered the essential building blocks of computer vision with OpenCV: loading and displaying images, color space conversions, geometric transformations, multiple edge detection algorithms, and face detection using Haar cascades. These fundamentals form the foundation for more advanced topics like object tracking, optical flow, image segmentation, and deep learning-based detection with models like YOLO and SSD. OpenCV’s extensive documentation and active community make it an ideal starting point for any computer vision project, and its C++ core ensures that even Python code runs at production-viable speeds.