【leetcode】Repeated DNA Sequences

来源:互联网 发布:iphone导入照片软件 编辑:程序博客网 时间:2024/06/15 14:27

From: https://leetcode.com/problems/repeated-dna-sequences/

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

For example,

Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",Return:["AAAAACCCCC", "CCCCCAAAAA"].

Hide Tags
 Hash Table Bit Manipulation

Solution:

class Solution {public:    vector<string> findRepeatedDnaSequences(string s) {        vector<string> ans;        int len=s.size();        if(len < 10) return ans;                const char A = 'A';        unsigned char convert[26];        convert[0] = 0;     // A:00        convert['C'-A] = 1; // C:01        convert['G'-A] = 2; // G:10        convert['T'-A] = 3; // T:11        bool hashMap[1024*1024] = {false};        int hashValue = 0;        for(int pos=0; pos<10; ++pos) {            hashValue <<= 2;            hashValue |= convert[s[pos]-A];        }        hashMap[hashValue] = true;        unordered_set<int> ansContain;                for(int pos=10; pos<len; ++pos) {            hashValue <<= 2;            hashValue |= convert[s[pos]-A];            hashValue &= ~(0xf00000);            if(hashMap[hashValue]) {                if(ansContain.find(hashValue) == ansContain.end()) {                    ans.push_back(s.substr(pos-9, 10));                    ansContain.insert(hashValue);                }            } else {                hashMap[hashValue] = true;            }        }                        return ans;    }};

public class Solution {    public List<String> findRepeatedDnaSequences(String s) {        List<String> ans = new ArrayList<String>();int len;if (s == null || (len = s.length()) < 10) {return ans;}char[] convert = new char[26];convert[0] = 0; // A:00convert['C' - 'A'] = 1; // C:01convert['G' - 'A'] = 2; // G:10convert['T' - 'A'] = 3; // T:11boolean[] hashMap = new boolean[1024 * 1024];int hashValue = 0; // 20位的hash值for (int i = 0; i < 10; ++i) {hashValue <<= 2;hashValue |= convert[s.charAt(i) - 'A'];}hashMap[hashValue] = true;Set<Integer> ansContain = new HashSet<Integer>();for (int i = 10; i < len; ++i) {hashValue <<= 2;// 平移两位hashValue = (hashValue | convert[s.charAt(i) - 'A']) & 0x000fffff;// 更新截取后20位if (hashMap[hashValue] && !ansContain.contains(hashValue)) {ans.add(s.substring(i - 9, i + 1));ansContain.add(hashValue);}hashMap[hashValue] = true;}return ans;    }}


0 0
原创粉丝点击