Chaining¶

#!/usr/bin/env python3

# handling collisions -> probing (open addressing) and chaining (closed addressing (in place)
# probing : changing the position of they key to next empty slot available
# chaining : forming linked list / tree for chaining of collisions
# if chain too long -> O(N) complexity : to fix : rehash -> typically double the size of the array and rehash it.
# chaining is done via linkedlist O(N) or tree O(logN)
# be on the same index and go down and attach to the linked list

# We're making an array of linked lists (buckets) : each item in array is of type linked list.

# creating linked list class


class Node:
    def __init__(self, key, value):
        self.key = key
        self.value = value
        self.next = None  # initially


class LL:
    def __init__(self):
        self.head = None

    def add(self, key, value):
        new_node = Node(key, value)
        if self.head == None:  # add to beginning of LL
            self.head = new_node
        else:  # add to end of LL
            temp = self.head
            while temp.next != None:
                temp = temp.next
            temp.next = new_node

    # deleting head
    def delete_head(self):
        if self.head == None:
            return "Empty"
        else:
            self.head = self.head.next

    # removing is based on key only
    def remove(self, key):
        if self.head.key == key:
            self.delete_head()
            return

        if self.head == "None":
            return "Empty"
        else:  # find the key to be deleted
            temp = self.head
            while temp.next != None:
                if temp.next.data == key:
                    break
                temp = temp.next
            if temp.next == None:
                return "Not found"
            else:
                temp.next = temp.next.next  # delete node and point it to next.next : temp = 1 :: 1 -> 2 -> 3 [2 delete] : 1 -> 3

    def traverse(self):
        temp = self.head
        while temp != None:
            print(temp.key, "-->", temp.value, " ", end=" ")
            temp = temp.next

    # size of linkedlist : used in lf rehash
    def size(self):
        temp = self.head
        counter = 0
        while temp != None:
            counter += 1
            temp = temp.next
        return counter

    def search(self, key):  # returns index of the key
        temp = self.head
        pos = 0
        while temp != None:
            if temp.key == key:
                return pos
            temp = temp.next
            pos += 1
        return -1

    # for put : get_node_at_index :: if {2 -> 3,  4 -> 5, 6 -> 8} and get_node_at_index(1).key : 4 and get_node_at_index(0).value = 3
    # helps in updating the node if already present in put case in chaining
    def get_node_at_index(self, index):
        temp = self.head
        counter = 0
        while temp is not None:
            if counter == index:
                return temp
            temp = temp.next
            counter += 1


# Hashing via chaining
# rehashing is based on laod factor : size / capacity
# size = nnumber of linked list chaining at that position
# capacity = number of elements in array
# load factor <= 2 (asusume) : if more than 2 -> array size double (capacity * 2)
# old bucket -> rehash and add it to new bucket of new capacity
# capacity changed so hash_function also changed -> balances things out


class Dictionary:
    def __init__(self, capacity):
        self.capacity = capacity  # number of items in array
        self.size = 0  # how many k-v pairs in my dictionary
        # array of linked list
        self.buckets = self.make_array(self.capacity)

    def make_array(self, capacity):
        L = []
        for i in range(capacity):
            L.append(
                LL()
            )  # inside L there will exists n number of linked list class (n = capacity)
        return L

    def get(self, key):
        bucket_index = self.hash_function(key)
        res = self.buckets[bucket_index].search(key)
        if res == -1:
            return "Not Present"
        else:
            node = self.buckets[bucket_index].get_node_at_index(res)
            return node.value

    def put(self, key, value):
        # index position will be finalised (unlike linear probing in which index posn + 1 if already filled) -> forms chain if already filled.
        # this index is called bucket index (each LL is called bucket)
        bucket_index = self.hash_function(key)
        # 2 possibilities : if empty : add to it, if filled : add to tail
        # edge case check : if key already present -> just update the value no need to enter it again else if new key : just add to tail
        # 2 functions : insert and update
        # update : get node index
        node_index = self.get_node_index(bucket_index, key)

        if node_index == -1:  # new fresh node
            # insert
            self.buckets[bucket_index].add(
                key, value
            )  # in the array of linked list : goto that index/element_made_of_LL where insert and add key value pair
            self.size += 1

            load_factor = self.size / self.capacity
            if load_factor >= 2:  # more than lf
                self.rehash()
        else:
            # update
            node = self.buckets[bucket_index].get_node_at_index(
                node_index
            )  # get the updated node (K, V)
            node.value = value  # update

    def rehash(self):
        # double array size when load_Factor / threshold reached [re-initialise everything]
        self.capacity *= 2
        old_buckets = self.buckets
        self.size = 0
        self.buckets = self.make_array(self.capacity)

        # every item in old bucket needs to be re-hashed and put into the new bucket
        for i in old_buckets:  # LL [1,2,3,4]
            for j in range(i.size()):  # inside chaining LL traverse [1[1,2,3]]
                node = i.get_node_at_index(j)
                key_item = node.key
                value_item = node.value
                self.put(key_item, value_item)

    def hash_function(self, key):
        return abs(hash(key)) % self.capacity

    def get_node_index(self, bucket_index, key):
        node_index = self.buckets[bucket_index].search(key)
        return node_index

    # magic methods
    def __setitem__(self, key, value):
        self.put(key, value)

    def __getitem__(self, key):
        return self.get(key)

    def __delitem__(self, key):
        bucket_index = self.hash_function(key)
        self.buckets[bucket_index].remove(key)

    def __str__(self):
        for i in self.buckets:
            i.traverse()
        return ""

    def __len__(self):
        return self.size


d1 = Dictionary(4)
d1.put("python", 1)
d1["java"]  # not present
d1.put("java", 2)
d1.put("c++", 34)
d1["matlab"] = 334
print(d1)
print(len(d1))