Repeated DNA Sequences

来源:互联网 发布:贵金属手机交易软件 编辑:程序博客网 时间:2024/05/18 17:24

All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.

Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.

For example,

Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",Return:["AAAAACCCCC", "CCCCCAAAAA"].

class Solution {public:    int charToInt(char s)    {    switch (s)    {    case 'A':    return 0;    case 'C':    return 1;    case 'G':    return 2;    case 'T':    return 3;    }    }        vector<string> findRepeatedDnaSequences(string s) {        vector<string> result;    int maxNum = 1024*1024/8;    int num = maxNum/sizeof(unsigned int);    unsigned int found[num];    unsigned int outputed[num];    memset(found, 0, maxNum);    memset(outputed, 0, maxNum);    unsigned int sequence = 0;    int i = 0;    for (; i < 9; i++)    {    sequence |= charToInt(s[i]);    sequence <<= 2;    }    int bitLen = 8 * sizeof(unsigned int);    int len = s.length();    for (; i < len; i++)    {    sequence |= charToInt(s[i]);    sequence &= 0xFFFFF;    int pos = sequence / bitLen;    int offset = sequence % bitLen;    unsigned int label = 1 << offset;    if ((found[pos] & label) == 0)    {    found[pos] |= label;    }    else    {    if ((outputed[pos] & label) == 0)    {    outputed[pos] |= label;    result.push_back(s.substr(i-9, 10));    }    }    sequence <<= 2;    }    return result;    }};


0 0
原创粉丝点击