187. Repeated DNA Sequences

  • Difficulty: Medium

  • Topics: Hash Table, Bit Manipulation

  • Similar Questions:

Problem:

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

Example:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output: ["AAAAACCCCC", "CCCCCAAAAA"]

Solutions:

class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) {
        if (s.length() < 10)    return {};
        int code = 0;
        for (int i = 0; i < 9; ++i) {
            updateCode(code, s[i]);
        }

        unordered_map<int, int> merCount;
        vector<string> ret;

        for (int i = 9; i < s.length(); ++i) {
            updateCode(code, s[i]);
            if (++merCount[code] == 2) {
                ret.push_back(s.substr(i - 9, 10));
            }
        }

        return ret;

    }

    void updateCode(int& code, char c) {
        const int mask = 0xFFFFF;
        code = ((code << 2) & mask);
        switch(c) {
            case 'A':
                // no-op
                break;
            case 'C':
                code = code | 0x1;
                break;
            case 'G':
                code = code | 0x2;
                break;
            case 'T':
                code = code | 0x3;
                break;
            default:
                throw "Illegal input";
        }
    }
};

results matching ""

    No results matching ""