哈希算法题目:设计一个支持动态扩容和缩容的哈希表(使用再哈希策略)
字数 589 2025-11-09 03:26:54
哈希算法题目:设计一个支持动态扩容和缩容的哈希表(使用再哈希策略)
题目描述
设计一个哈希表,支持以下操作:
put(key, value): 插入键值对get(key): 获取键对应的值,如果键不存在返回-1remove(key): 删除键值对- 自动处理哈希冲突(使用链地址法)
- 当负载因子超过阈值时自动扩容(通常为0.75)
- 当负载因子低于阈值时自动缩容(通常为0.25)
- 扩容/缩容时使用再哈希策略重新分配所有元素
解题步骤详解
步骤1:理解基本哈希表结构
哈希表的核心是数组+冲突解决机制。我们使用:
- 一个固定大小的桶数组(初始容量)
- 每个桶存储一个链表(链地址法解决冲突)
- 负载因子 = 元素数量 / 桶的数量
步骤2:定义数据结构
class HashNode:
def __init__(self, key, value):
self.key = key
self.value = value
self.next = None
class DynamicHashTable:
def __init__(self, initial_capacity=8, load_factor=0.75, shrink_factor=0.25):
self.capacity = initial_capacity
self.size = 0
self.buckets = [None] * self.capacity
self.load_factor = load_factor # 扩容阈值
self.shrink_factor = shrink_factor # 缩容阈值
步骤3:实现哈希函数
def _hash(self, key):
# 使用Python内置哈希函数并取模
return hash(key) % self.capacity
步骤4:实现基本操作(不包含扩容/缩容)
def put(self, key, value):
index = self._hash(key)
node = self.buckets[index]
# 遍历链表,检查键是否已存在
while node:
if node.key == key:
node.value = value # 更新值
return
node = node.next
# 键不存在,插入新节点到链表头部
new_node = HashNode(key, value)
new_node.next = self.buckets[index]
self.buckets[index] = new_node
self.size += 1
def get(self, key):
index = self._hash(key)
node = self.buckets[index]
while node:
if node.key == key:
return node.value
node = node.next
return -1 # 键不存在
def remove(self, key):
index = self._hash(key)
node = self.buckets[index]
prev = None
while node:
if node.key == key:
if prev:
prev.next = node.next
else:
self.buckets[index] = node.next
self.size -= 1
return
prev = node
node = node.next
步骤5:实现动态调整策略
关键思想:当负载因子超过阈值时重新分配所有元素
def _resize(self, new_capacity):
# 保存旧数据
old_buckets = self.buckets
old_capacity = self.capacity
# 创建新桶数组
self.capacity = new_capacity
self.buckets = [None] * new_capacity
self.size = 0 # 重置大小,put操作会重新计数
# 重新插入所有元素(再哈希)
for i in range(old_capacity):
node = old_buckets[i]
while node:
self.put(node.key, node.value) # 使用新的哈希函数
node = node.next
def _check_resize(self):
current_load = self.size / self.capacity
# 检查是否需要扩容
if current_load > self.load_factor:
new_capacity = self.capacity * 2
self._resize(new_capacity)
# 检查是否需要缩容(避免缩容到太小)
elif current_load < self.shrink_factor and self.capacity > 8:
new_capacity = max(8, self.capacity // 2) # 最小容量为8
self._resize(new_capacity)
步骤6:完善put和remove方法
def put(self, key, value):
# ...(前面代码不变)
self.size += 1
self._check_resize() # 插入后检查调整
def remove(self, key):
# ...(前面代码不变)
self.size -= 1
self._check_resize() # 删除后检查调整
步骤7:处理边界情况
def put(self, key, value):
# 检查键是否为None(根据需求决定是否支持)
if key is None:
raise ValueError("Key cannot be None")
# ...其余代码不变
def _resize(self, new_capacity):
# 避免无限递归:在resize过程中暂时禁用检查
original_load_factor = self.load_factor
self.load_factor = float('inf') # 临时禁用
# ...执行resize操作
self.load_factor = original_load_factor # 恢复
完整实现示例
class DynamicHashTable:
def __init__(self, initial_capacity=8, load_factor=0.75, shrink_factor=0.25):
self.capacity = initial_capacity
self.size = 0
self.buckets = [None] * self.capacity
self.load_factor = load_factor
self.shrink_factor = shrink_factor
def _hash(self, key):
return hash(key) % self.capacity
def put(self, key, value):
index = self._hash(key)
node = self.buckets[index]
# 检查键是否已存在
while node:
if node.key == key:
node.value = value
return
node = node.next
# 插入新节点
new_node = HashNode(key, value)
new_node.next = self.buckets[index]
self.buckets[index] = new_node
self.size += 1
self._check_resize()
def get(self, key):
index = self._hash(key)
node = self.buckets[index]
while node:
if node.key == key:
return node.value
node = node.next
return -1
def remove(self, key):
index = self._hash(key)
node = self.buckets[index]
prev = None
while node:
if node.key == key:
if prev:
prev.next = node.next
else:
self.buckets[index] = node.next
self.size -= 1
self._check_resize()
return
prev = node
node = node.next
def _resize(self, new_capacity):
old_buckets = self.buckets
old_capacity = self.capacity
self.capacity = new_capacity
self.buckets = [None] * new_capacity
self.size = 0
# 重新插入所有元素
for i in range(old_capacity):
node = old_buckets[i]
while node:
self.put(node.key, node.value)
node = node.next
def _check_resize(self):
current_load = self.size / self.capacity
if current_load > self.load_factor:
self._resize(self.capacity * 2)
elif current_load < self.shrink_factor and self.capacity > 8:
self._resize(max(8, self.capacity // 2))
性能分析
- 平均时间复杂度:O(1) 对于put、get、remove操作
- 最坏情况:O(n) 当所有元素哈希到同一个桶时
- 空间复杂度:O(n)
- 动态调整保证了操作的高效性,避免了哈希表的性能退化
这种设计在实际系统中广泛应用,如Java的HashMap、Python的dict都采用了类似的动态调整策略。