Skip to content

Dnasequence

LeetCode Problem

# https://leetcode.com/problems/repeated-dna-sequences/
# Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
# Output: ["AAAAACCCCC","CCCCCAAAAA"]
"""
Sliding Window of 10, keep 2 hashsets : seen and ans, if 10 characters already in seen (repeats again) -> add to answer else first time seeing these 10 characters : add to seen hashset
"""


def findRepeatedDnaSequences(s):
    seen, ans = set(), set()
    for l in range(len(s) - 9):  # atleast 10 characters should be present
        curr = s[l : l + 10]  # sliding window range
        if curr in seen:
            ans.add(curr)
        else:
            seen.add(curr)
    return list(ans)  # convert to list


def main():
    s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
    print(findRepeatedDnaSequences(s))


main()