Why Implement Data Structures from Scratch?
Python gives you lists, dicts, and sets out of the box, so why bother implementing data structures manually? Because understanding how they work internally makes you a fundamentally better programmer. You will know when a list is the wrong choice, why certain operations are slow, and how to pick the right structure for your problem. In this guide, we implement four essential data structures from scratch: a linked list, a binary search tree, a stack, and a queue. Every method is fully functional and ready to use.
Stack: Last In, First Out
A stack follows the LIFO principle. Think of a stack of plates: you always take from the top. Stacks are used in function call management, undo systems, expression parsing, and depth-first search.
class Stack:
"""A stack implementation using a Python list."""
def __init__(self):
self._items = []
def push(self, item):
"""Add an item to the top of the stack. O(1)."""
self._items.append(item)
def pop(self):
"""Remove and return the top item. O(1). Raises IndexError if empty."""
if self.is_empty():
raise IndexError("Pop from empty stack")
return self._items.pop()
def peek(self):
"""Return the top item without removing it. O(1)."""
if self.is_empty():
raise IndexError("Peek at empty stack")
return self._items[-1]
def is_empty(self):
"""Check if the stack is empty. O(1)."""
return len(self._items) == 0
def size(self):
"""Return the number of items. O(1)."""
return len(self._items)
def __repr__(self):
return f"Stack({self._items})"
# Usage
stack = Stack()
stack.push(10)
stack.push(20)
stack.push(30)
print(stack) # Stack([10, 20, 30])
print(stack.peek()) # 30
print(stack.pop()) # 30
print(stack.pop()) # 20
print(stack.size()) # 1
# Practical example: Check balanced parentheses
def is_balanced(expression):
"""Check if parentheses, brackets, and braces are balanced."""
stack = Stack()
pairs = {")": "(", "]": "[", "}": "{"}
for char in expression:
if char in "([{":
stack.push(char)
elif char in ")]}":
if stack.is_empty() or stack.pop() != pairs[char]:
return False
return stack.is_empty()
print(is_balanced("({[()]})")) # True
print(is_balanced("({[()]}")) # False
print(is_balanced(")(")) # False
Queue: First In, First Out
A queue follows the FIFO principle, like a line at a store. Queues are used in breadth-first search, task scheduling, and message processing.
from collections import deque
class Queue:
"""A queue implementation using collections.deque for O(1) operations."""
def __init__(self):
self._items = deque()
def enqueue(self, item):
"""Add an item to the back of the queue. O(1)."""
self._items.append(item)
def dequeue(self):
"""Remove and return the front item. O(1). Raises IndexError if empty."""
if self.is_empty():
raise IndexError("Dequeue from empty queue")
return self._items.popleft()
def front(self):
"""Return the front item without removing it. O(1)."""
if self.is_empty():
raise IndexError("Front of empty queue")
return self._items[0]
def is_empty(self):
return len(self._items) == 0
def size(self):
return len(self._items)
def __repr__(self):
return f"Queue({list(self._items)})"
# Usage
q = Queue()
q.enqueue("Task A")
q.enqueue("Task B")
q.enqueue("Task C")
print(q) # Queue(['Task A', 'Task B', 'Task C'])
print(q.dequeue()) # Task A
print(q.front()) # Task B
print(q.size()) # 2
Linked List
A linked list stores elements in nodes, where each node points to the next one. Unlike arrays, linked lists allow O(1) insertion and deletion at any position (if you have a reference to the node), but O(n) access by index.
class Node:
"""A single node in a linked list."""
def __init__(self, data):
self.data = data
self.next = None
def __repr__(self):
return f"Node({self.data})"
class LinkedList:
"""A singly linked list with head and tail pointers."""
def __init__(self):
self.head = None
self.tail = None
self._size = 0
def append(self, data):
"""Add a node to the end of the list. O(1)."""
new_node = Node(data)
if self.head is None:
self.head = new_node
self.tail = new_node
else:
self.tail.next = new_node
self.tail = new_node
self._size += 1
def prepend(self, data):
"""Add a node to the beginning of the list. O(1)."""
new_node = Node(data)
new_node.next = self.head
self.head = new_node
if self.tail is None:
self.tail = new_node
self._size += 1
def delete(self, data):
"""Delete the first node with the given data. O(n)."""
if self.head is None:
return False
if self.head.data == data:
self.head = self.head.next
if self.head is None:
self.tail = None
self._size -= 1
return True
current = self.head
while current.next:
if current.next.data == data:
if current.next == self.tail:
self.tail = current
current.next = current.next.next
self._size -= 1
return True
current = current.next
return False
def find(self, data):
"""Search for a node by value. Returns the node or None. O(n)."""
current = self.head
while current:
if current.data == data:
return current
current = current.next
return None
def to_list(self):
"""Convert the linked list to a Python list. O(n)."""
result = []
current = self.head
while current:
result.append(current.data)
current = current.next
return result
def reverse(self):
"""Reverse the linked list in place. O(n)."""
self.tail = self.head
prev = None
current = self.head
while current:
next_node = current.next
current.next = prev
prev = current
current = next_node
self.head = prev
def __len__(self):
return self._size
def __repr__(self):
return " -> ".join(str(d) for d in self.to_list()) + " -> None"
# Usage
ll = LinkedList()
ll.append(10)
ll.append(20)
ll.append(30)
ll.prepend(5)
print(ll) # 5 -> 10 -> 20 -> 30 -> None
print(len(ll)) # 4
print(ll.find(20)) # Node(20)
ll.delete(20)
print(ll) # 5 -> 10 -> 30 -> None
ll.reverse()
print(ll) # 30 -> 10 -> 5 -> None
Binary Search Tree
A binary search tree (BST) maintains sorted order: all values in the left subtree are smaller, and all values in the right subtree are larger. This gives O(log n) search, insert, and delete on average.
class TreeNode:
"""A node in a binary search tree."""
def __init__(self, value):
self.value = value
self.left = None
self.right = None
class BinarySearchTree:
"""A binary search tree with insert, search, delete, and traversal."""
def __init__(self):
self.root = None
self._size = 0
def insert(self, value):
"""Insert a value into the BST. O(log n) average."""
if self.root is None:
self.root = TreeNode(value)
else:
self._insert_recursive(self.root, value)
self._size += 1
def _insert_recursive(self, node, value):
if value < node.value:
if node.left is None:
node.left = TreeNode(value)
else:
self._insert_recursive(node.left, value)
elif value > node.value:
if node.right is None:
node.right = TreeNode(value)
else:
self._insert_recursive(node.right, value)
# Duplicate values are ignored
def search(self, value):
"""Search for a value. Returns True if found. O(log n) average."""
return self._search_recursive(self.root, value)
def _search_recursive(self, node, value):
if node is None:
return False
if value == node.value:
return True
elif value < node.value:
return self._search_recursive(node.left, value)
else:
return self._search_recursive(node.right, value)
def delete(self, value):
"""Delete a value from the BST. O(log n) average."""
self.root, deleted = self._delete_recursive(self.root, value)
if deleted:
self._size -= 1
return deleted
def _delete_recursive(self, node, value):
if node is None:
return None, False
deleted = False
if value < node.value:
node.left, deleted = self._delete_recursive(node.left, value)
elif value > node.value:
node.right, deleted = self._delete_recursive(node.right, value)
else:
deleted = True
# Node with one child or no child
if node.left is None:
return node.right, True
elif node.right is None:
return node.left, True
# Node with two children: find in-order successor
successor = self._find_min(node.right)
node.value = successor.value
node.right, _ = self._delete_recursive(node.right, successor.value)
return node, deleted
def _find_min(self, node):
current = node
while current.left:
current = current.left
return current
def inorder(self):
"""In-order traversal: returns sorted values. O(n)."""
result = []
self._inorder_recursive(self.root, result)
return result
def _inorder_recursive(self, node, result):
if node:
self._inorder_recursive(node.left, result)
result.append(node.value)
self._inorder_recursive(node.right, result)
def preorder(self):
"""Pre-order traversal. O(n)."""
result = []
self._preorder_recursive(self.root, result)
return result
def _preorder_recursive(self, node, result):
if node:
result.append(node.value)
self._preorder_recursive(node.left, result)
self._preorder_recursive(node.right, result)
def height(self):
"""Calculate the height of the tree. O(n)."""
return self._height_recursive(self.root)
def _height_recursive(self, node):
if node is None:
return -1
left_h = self._height_recursive(node.left)
right_h = self._height_recursive(node.right)
return 1 + max(left_h, right_h)
def __len__(self):
return self._size
def __contains__(self, value):
return self.search(value)
# Usage
bst = BinarySearchTree()
for val in [50, 30, 70, 20, 40, 60, 80, 10]:
bst.insert(val)
print("In-order:", bst.inorder()) # [10, 20, 30, 40, 50, 60, 70, 80]
print("Pre-order:", bst.preorder()) # [50, 30, 20, 10, 40, 70, 60, 80]
print("Height:", bst.height()) # 3
print("Size:", len(bst)) # 8
print("Contains 40:", 40 in bst) # True
print("Contains 45:", 45 in bst) # False
bst.delete(30)
print("After deleting 30:", bst.inorder()) # [10, 20, 40, 50, 60, 70, 80]
Key Takeaways
Understanding data structures is essential for writing efficient code and performing well in technical interviews. The stack and queue are simple but appear constantly in algorithms: use a stack for depth-first traversal and a queue for breadth-first. Linked lists shine when you need frequent insertions and deletions at arbitrary positions. Binary search trees provide efficient sorted storage with O(log n) operations on average. Every implementation in this guide is production-quality with proper edge case handling. Use them as a reference, extend them with additional methods, and study how they compare to Python’s built-in data structures. The next step is to explore balanced trees like AVL or Red-Black trees, which guarantee O(log n) performance even in the worst case.