Skip to content

Linear Probing

#!/usr/bin/env python3
# fast searching
# implement dictionary

# hashing : hash function : hf(key) = key % size
# problem with this function : doesn't work on strings (list : [cat, dog, rain]) can't do cat % 3 : need to find ascii('c') + ascii('a') + ascii('t') % 3
# python inbuilt : hash() : hash(123) = 123 works and hash(cat) : 1329667300978243676
# problem : sometimes hashes are negative : use abs
# Linear Probing : if same hash : rehash : (old_hash_value + 1) % size : looks for next open slot until empty slot found

# handling collisions -> probing (open addressing) and chaining (closed addressing (in place)
# probing : changing the position of they key to next empty slot available
# chaining : forming linked list / tree for chaining of collisions

# 2 arrays needed : slots (keys) and data (values)
# {python : 37} : python's hash value : 2 so in 2th index of slots array : put python and in 2nd index of data array : put 37
# slots : [_,_,python,_,_,_] # key
# data : [_,_,37,_,_,_] # value


class Dictioanry:
    def __init__(self, size):
        self.size = size
        self.slots = [None] * self.size
        self.data = [None] * self.size

    def put(self, key, value):
        hash_value = self.hash_function(key)
        if (
            self.slots[hash_value] == None
        ):  # if empty : put key in slot and value in data
            self.slots[hash_value] = key
            self.data[hash_value] = value
        else:
            # 2 options : same key already present : update value || different key present : rehash
            if self.slots[hash_value] == key:
                self.data[hash_value] = (
                    value  # update with new value (no need to change key)
                )
            else:
                new_hash_value = self.rehash(hash_value)
                # keep iterating ahead until empty space found -> to put the value && same key already present : [c++, java, php, python, _] and key = python -> then it'll place it in _ (it shouldn't)
                while (
                    self.slots[new_hash_value] != None
                    and self.slots[new_hash_value] != key
                ):
                    new_hash_value = self.rehash(new_hash_value)

                if self.slots[new_hash_value] == None:
                    # put key and value
                    self.slots[new_hash_value] = key
                    self.data[new_hash_value] = value
                else:  # update existing key
                    self.data[new_hash_value] = value

    def get(self, key):
        # keep iterating until you get the item.
        # how to know if item not present : if start_pos = end_pos (checked everything) || empty slot found
        start_pos = self.hash_function(key)
        current_pos = start_pos
        while (
            self.slots[current_pos] != None
        ):  # keep looking until empty slot found -> empty slot represnts value not present
            if self.slots[current_pos] == key:
                return self.data[current_pos]
            current_pos = self.rehash(current_pos)  # increment by 1 every time
            if current_pos == start_pos:
                return "Not found"

        return "Not found -> empty slot"

    # printing key value pair like an actual {} : magic method __str__ used
    def __str__(self) -> str:
        for i in range(len(self.slots)):
            if self.slots[i] != None:  # if not empty : print
                print(self.slots[i], ":", self.data[i], end=", ")
        return ""

    # magic methods for put and get : instead of d1.get(k) or d1.push(k, v) we can do put = d["k"] = v and get = d["k"] {like a dictionary}
    def __getitem__(self, key):
        return self.get(key)

    def __setitem__(self, key, value):
        self.put(key, value)

    def rehash(self, old_hash):
        return (old_hash + 1) % self.size

    def hash_function(self, key):
        return abs(hash(key)) % self.size


d1 = Dictioanry(3)
print(d1.slots)  # None None None
print(d1.data)  # None None None

d1["python"] = 56
d1["c++"] = 100
d1["java"] = 230
print(d1.get("java"))  # 230
print(d1)  # c++ : 100, java : 230, python : 56,