
来源:互联网 发布:sql语句查询字段长度 编辑:程序博客网 时间:2024/06/10 09:40






//暴力子字符串查找public class ViolenceSubStringSearch {    @SuppressWarnings("unused")    public static int search(String pat, String txt)    {        int M = pat.length();        int N = txt.length();        for(int i = 0; i <= N-M; i++)        {            int j;            for(j = 0; j < M; j++)            {                if(txt.charAt(i + j) != pat.charAt(j));                break;            }            if(j == M)            {                return i;   //找到匹配            }        }        return N;           //未找到匹配    }}








最后一个关键的细节,如何维护重启位置X,因为X< j,所以可以由已经构造的DFA部分来完成这个任务–X的下一个值是dfa[pat.charAt(j)][X].


  • 将dfa[][X]复制到dfa[][j](对于失败的情况)
  • 将dfa[pat.charAt(j)][j]设为j+1(对于匹配成功的情况)
  • 更新X。



//KMP子字符串查找public class KMP {    private final int R;       // the radix    private int[][] dfa;       // the KMP automoton    private char[] pattern;    // either the character array for the pattern    private String pat;        // or the pattern string    /**     * Preprocesses the pattern string.     *     * @param pat the pattern string     */    public KMP(String pat)     {        this.R = 256;        this.pat = pat;        // build DFA from pattern        int m = pat.length();        dfa = new int[R][m];         dfa[pat.charAt(0)][0] = 1;         for (int x = 0, j = 1; j < m; j++)         {            for (int c = 0; c < R; c++)             {                dfa[c][j] = dfa[c][x];     // Copy mismatch cases.             }            dfa[pat.charAt(j)][j] = j+1;   // Set match case.             x = dfa[pat.charAt(j)][x];     // Update restart state.         }     }     /**     * Preprocesses the pattern string.     *     * @param pattern the pattern string     * @param R the alphabet size     */    public KMP(char[] pattern, int R)     {        this.R = R;        this.pattern = new char[pattern.length];        for (int j = 0; j < pattern.length; j++)        {            this.pattern[j] = pattern[j];        }        // build DFA from pattern        int m = pattern.length;        dfa = new int[R][m];         dfa[pattern[0]][0] = 1;         for (int x = 0, j = 1; j < m; j++)         {            for (int c = 0; c < R; c++)             {                dfa[c][j] = dfa[c][x];     // Copy mismatch cases.             }            dfa[pattern[j]][j] = j+1;      // Set match case.             x = dfa[pattern[j]][x];        // Update restart state.         }     }     /**     * Returns the index of the first occurrrence of the pattern string     * in the text string.     *     * @param  txt the text string     * @return the index of the first occurrence of the pattern string     *         in the text string; N if no such match     */    public int search(String txt)     {        // simulate operation of DFA on text        int m = pat.length();        int n = txt.length();        int i, j;        for (i = 0, j = 0; i < n && j < m; i++)         {            j = dfa[txt.charAt(i)][j];        }        if (j == m) return i - m;    // found        return n;                    // not found    }    /**     * Returns the index of the first occurrrence of the pattern string     * in the text string.     *     * @param  text the text string     * @return the index of the first occurrence of the pattern string     *         in the text string; N if no such match     */    public int search(char[] text)     {        // simulate operation of DFA on text        int m = pattern.length;        int n = text.length;        int i, j;        for (i = 0, j = 0; i < n && j < m; i++)         {            j = dfa[text[i]][j];        }        if (j == m) return i - m;    // found        return n;                    // not found    }    /**      * Takes a pattern string and an input string as command-line arguments;     * searches for the pattern string in the text string; and prints     * the first occurrence of the pattern string in the text string.     *     * @param args the command-line arguments     */    public static void main(String[] args)     {        String pat = "AACAA";        String txt = "AABRAACADABRAACAADABRA";        char[] pattern = pat.toCharArray();        char[] text    = txt.toCharArray();        KMP kmp1 = new KMP(pat);        int offset1 = kmp1.search(txt);        KMP kmp2 = new KMP(pattern, 256);        int offset2 = kmp2.search(text);        // print results        System.out.println("text:    " + txt);        System.out.print("pattern: ");        for (int i = 0; i < offset1; i++)            System.out.print(" ");        System.out.println(pat);        System.out.print("pattern: ");        for (int i = 0; i < offset2; i++)            System.out.print(" ");        System.out.println(pat);    }}


text:    AABRAACADABRAACAADABRApattern:             AACAApattern:             AACAA




  • 造成匹配失败的字符不包含在模式字符串中,将模式字符串向右移动j+1个位置(即将i增加j+1)。
  • 造成匹配失败的字符包含在模式字符串中,就可以用right[]数组来讲模式字符串和文本对其,使得该字符和它在模式字符串中出现的最右位置相匹配。
  • 如果这种方式无法增大i,那就直接将i+1来保证模式字符串至少向右移动了一个位置。


//BoyerMoore字符串匹配算法(启发式地处理不匹配的字符)public class BoyerMoore {    private final int R;     // the radix    private int[] right;     // the bad-character skip array    private char[] pattern;  // store the pattern as a character array    private String pat;      // or as a string    /**     * Preprocesses the pattern string.     *     * @param pat the pattern string     */    public BoyerMoore(String pat)     {        this.R = 256;        this.pat = pat;        // position of rightmost occurrence of c in the pattern        right = new int[R];        for (int c = 0; c < R; c++)        {            right[c] = -1;          //不包含在模式字符串中的字符的值为-1        }        for (int j = 0; j < pat.length(); j++)        {//包含在模式字符串中的字符的值为它在其中出现的最右位置            right[pat.charAt(j)] = j;        }    }    /**     * Preprocesses the pattern string.     *     * @param pattern the pattern string     * @param R the alphabet size     */    public BoyerMoore(char[] pattern, int R)     {        this.R = R;        this.pattern = new char[pattern.length];        for (int j = 0; j < pattern.length; j++)        {            this.pattern[j] = pattern[j];        }        // position of rightmost occurrence of c in the pattern        right = new int[R];        for (int c = 0; c < R; c++)        {            right[c] = -1;        }        for (int j = 0; j < pattern.length; j++)        {            right[pattern[j]] = j;        }    }    /**     * Returns the index of the first occurrrence of the pattern string     * in the text string.     *     * @param  txt the text string     * @return the index of the first occurrence of the pattern string     *         in the text string; n if no such match     */    public int search(String txt)     {        int m = pat.length();        int n = txt.length();        int skip;        for (int i = 0; i <= n - m; i += skip)         {            skip = 0;            for (int j = m-1; j >= 0; j--)             {                if (pat.charAt(j) != txt.charAt(i+j))                 {                    skip = Math.max(1, j - right[txt.charAt(i+j)]);                    break;                }            }            if (skip == 0) return i;    // found        }        return n;                       // not found    }    /**     * Returns the index of the first occurrrence of the pattern string     * in the text string.     *     * @param  text the text string     * @return the index of the first occurrence of the pattern string     *         in the text string; n if no such match     */    public int search(char[] text)     {        int m = pattern.length;        int n = text.length;        int skip;        for (int i = 0; i <= n - m; i += skip)         {            skip = 0;            for (int j = m-1; j >= 0; j--)             {                if (pattern[j] != text[i+j])                 {                    skip = Math.max(1, j - right[text[i+j]]);                    break;                }            }            if (skip == 0) return i;    // found        }        return n;                       // not found    }    /**     * Takes a pattern string and an input string as command-line arguments;     * searches for the pattern string in the text string; and prints     * the first occurrence of the pattern string in the text string.     *     * @param args the command-line arguments     */    public static void main(String[] args)     {        String pat = "AACAA";        String txt = "AABRAACADABRAACAADABRA";        char[] pattern = pat.toCharArray();        char[] text    = txt.toCharArray();        BoyerMoore boyermoore1 = new BoyerMoore(pat);        BoyerMoore boyermoore2 = new BoyerMoore(pattern, 256);        int offset1 = boyermoore1.search(txt);        int offset2 = boyermoore2.search(text);        // print results        System.out.println("text:    " + txt);        System.out.print("pattern: ");        for (int i = 0; i < offset1; i++)            System.out.print(" ");        System.out.println(pat);        System.out.print("pattern: ");        for (int i = 0; i < offset2; i++)            System.out.print(" ");        System.out.println(pat);    }}


text:    AABRAACADABRAACAADABRApattern:             AACAApattern:             AACAA




import java.math.BigInteger;import java.util.Random;//RabinKarp指纹字符串查找算法public class RabinKarp {    private String pat;      // the pattern  // needed only for Las Vegas    private long patHash;    // pattern hash value    private int m;           // pattern length    private long q;          // a large prime, small enough to avoid long overflow    private int R;           // radix    private long RM;         // R^(M-1) % Q    /**     * Preprocesses the pattern string.     *     * @param pattern the pattern string     * @param R the alphabet size     */    public RabinKarp(char[] pattern, int R)     {        throw new UnsupportedOperationException("Operation not supported yet");    }    /**     * Preprocesses the pattern string.     *     * @param pat the pattern string     */    public RabinKarp(String pat)     {        this.pat = pat;      // save pattern (needed only for Las Vegas)        R = 256;        m = pat.length();        q = longRandomPrime();        // precompute R^(m-1) % q for use in removing leading digit        RM = 1;        for (int i = 1; i <= m-1; i++)        {            RM = (R * RM) % q;        }        patHash = hash(pat, m);    }     // Compute hash for key[0..m-1].     private long hash(String key, int m)     {         long h = 0;         for (int j = 0; j < m; j++)         {            h = (R * h + key.charAt(j)) % q;        }        return h;    }    // Las Vegas version: does pat[] match txt[i..i-m+1] ?    private boolean check(String txt, int i)     {        for (int j = 0; j < m; j++)         {            if (pat.charAt(j) != txt.charAt(i + j))             {                return false;            }        }        return true;    }    // Monte Carlo version: always return true    @SuppressWarnings("unused")    private boolean check(int i)     {        return true;    }    /**     * Returns the index of the first occurrrence of the pattern string     * in the text string.     *     * @param  txt the text string     * @return the index of the first occurrence of the pattern string     *         in the text string; n if no such match     */    public int search(String txt)     {        int n = txt.length();         if (n < m) return n;        long txtHash = hash(txt, m);         // check for match at offset 0        if ((patHash == txtHash) && check(txt, 0))        {            return 0;        }        // check for hash match; if hash match, check for exact match        for (int i = m; i < n; i++)         {            // Remove leading digit, add trailing digit, check for match.             txtHash = (txtHash + q - RM*txt.charAt(i-m) % q) % q;             txtHash = (txtHash*R + txt.charAt(i)) % q;             // match            int offset = i - m + 1;            if ((patHash == txtHash) && check(txt, offset))            {                return offset;            }        }        // no match        return n;    }    // a random 31-bit prime    private static long longRandomPrime()     {        BigInteger prime = BigInteger.probablePrime(31, new Random());        return prime.longValue();    }    /**      * Takes a pattern string and an input string as command-line arguments;     * searches for the pattern string in the text string; and prints     * the first occurrence of the pattern string in the text string.     *     * @param args the command-line arguments     */    public static void main(String[] args)     {        String pat = "AACAA";        String txt = "AABRAACADABRAACAADABRA";        RabinKarp searcher = new RabinKarp(pat);        int offset = searcher.search(txt);        // print results        System.out.println("text:    " + txt);        // from brute force search method 1        System.out.print("pattern: ");        for (int i = 0; i < offset; i++)            System.out.print(" ");        System.out.println(pat);    }}
0 0