LZW压缩.java

来源：互联网发布：网络黄金egd网址多少编辑：程序博客网时间：2024/05/18 18:18
package compress;import java.util.LinkedList;import java.util.Queue;import java.util.Scanner;class LZW{    private static final int R = 256;    private static final int L = 4096;//编码总数：2^12    private static final int W = 12;//编码宽度，用12位二进制来表示一个字符串    public static void compress(String input){        TrieST<Integer> st = new TrieST<Integer>();        for(int i=0;i<R;i++)//前256个字符还是ACSII中的编码，值在256-4096之间的12位二进制数编码剩余的组合字符串             st.put(""+(char)i, i);        int codeword = R+1;//R为文件结束(EOF)的编码        while(input.length()>0){            String s = st.longestPrefixOf(input);//从input中找出匹配st中的字符串 的最长前缀            System.out.println(st.get(s));//得出来的code完全可以用12位的二进制存储来节省空间            int t = s.length();            if(t<input.length()&&codeword <L)                st.put(input.substring(0,t+1), codeword++);                                       //0-t是最长匹配前缀            input = input.substring(t);        }        System.out.println(256);//codeword == 256代表结束    }    public static void expand(){        Scanner sc = new Scanner(System.in);        String txt = "";        String[] st = new String[L];        int i;//下一个待补全的编码值        for(i=0;i<R;i++)            st[i] = ""+(char) i;//通过编码值来找字符串就不需要用三向单词查找树了        st[i++] = " ";//未使用，文件结束标记（EOF）的前瞻字符        int codeword = sc.nextInt();        String val = st[codeword];        txt += val;        //System.out.println(val);        while(true){            codeword = sc.nextInt();//当前输入的字符            if(codeword == R)//st[R]代表着EOF,codeword==256代表结束                break;            String s = st[codeword];//获取当前的编码的对应的字符串            if(i == codeword)//如果前瞻字符不可用                s = val + val.charAt(0);// 根据上一个字符串的首字母得到编码的                                       if(i<L)           //当前编码的字符串.首字母                st[i++] = val + s.charAt(0);//为编译表添加新的条目                        //val代表上次codeword的字符串             val = s;//更新当前编码            txt += val;            //System.out.println(val);        }        sc.close();        System.out.println(txt);    }}public class LZW压缩 {    public static void main(String[] args) {        LZW.compress("ABRACADABRABRABRA");        LZW.expand();    }}//引入了一个三向单词查找树class TrieST<Value>{    private static int R = 256;//基数    private Node root;    private static class Node{        private Object val;//对于一个字符串，当然会有对应的value，例如 ： 张三  21        private Node[] next = new Node[R];//不止26个字母    }    //因为Java不支持泛型数组，所以此处类型必须为Object,可以在get()中将值的类型转换成为Value    /*否则可以这样     * private static class Node<Value>{     * private Value val;     * private Node<Value>[] next = new Node<Value>[R];//不止26个字母     * }     */    public void put(String key,Value val){        root = put(root,key,val,0);    }    private Node put(Node x,String key,Value val,int d){        // 如果key存在于以x为根节点的子单词查找树中，就更新与他相关联的值        if(x==null)             x = new Node();        if(d == key.length()){//每一个结点有一个value 和 一个256大小的next[下标是第d个单词对应的ACSII值]数组，其值为下一个结点(对应第d个单词)            x.val = val;            return x;        }        char c = key.charAt(d);        x.next[c] = put(x.next[c],key,val,d+1);        return x;    }    public Value get(String key){        Node x = get(root,key,0);//返回的是单词所在的结点        if(x == null)             return null;        return (Value)x.val;    }    private Node get(Node x,String key,int d){        if(x == null)                  //这个d是标记匹配到key的第几个字符            return null;        if(d == key.length())            return x;        char c = key.charAt(d);//找到第d个字符对应的子单词查找树        return get(x.next[c],key,d+1);    }    public void delete(String key){        root = delete(root,key,0);    }    private Node delete(Node x,String key,int d){        if(x == null)            return null;        if(d == key.length())            x.val = null;        else{//如果没有到最后的长度就要一直递归找下去            char c = key.charAt(d);            x.next[c] = delete(x.next[c],key,d+1);        }        if(x.val != null)//找到后如果当前节点有值，x还有用            return x;        for(char c=0;c<R;c++)//如果当前节点有其他的分支，x还有用            if(x.next[c] != null)                return x;        return null;//x没有用了，返回null就可以了    }    //收集一棵单词查找树中的所有键的轨迹    public Iterable<String> keys(){        return keysWithPrefix("");//结果是大的    }    public Iterable<String> keysWithPrefix(String pre){        Queue<String> q = new LinkedList<String>();        collect(get(root,pre,0),pre,q);              //从get(root,pre,0)为根结点进行遍历        return q;    }    private void collect(Node x,String pre,Queue<String> q){        if(x == null)            return ;        if(x.val != null)//当前结点有值代表着一个键，把他存入队列            q.add(pre);        for(char c=0;c<R;c++)            collect(x.next[c],pre+c,q);                   //递归遍历，  拼接字符串    }    //单词查找树中的通配符匹配(通配符是进行填充的)        //pat就是要被匹配的    public Iterable<String> keysThatMath(String pat){        Queue<String> q = new LinkedList<String>();        collect(root,"",pat,q);        return q;    }    private void collect(Node x,String pre,String pat,Queue<String> q){        int d = pre.length();   //因为这里需要pre这个参数，那么干脆把d这个参数去掉吧        if(x == null)            return;        if(d == pat.length()&& x.val != null)//只要匹配的长度跟用通配符匹配的pat长度相等，说明匹配完成            q.add(pre);        if(d == pat.length())            return;        char next = pat.charAt(d);//如果pat中已经匹配了d长度的字符串，那么下一个要匹配的是pat中第d个字符        for(char c=0;c<R;c++)            if(next == '.' || next == c)//当前的pat中的第d个字符是 . 说明不管c是什么都行；当前的pat中的第d个字符正好是c，可以继续                collect(x.next[c],pre+c,pat,q);    }    //对给定字符串的最长前缀进行匹配    public String longestPrefixOf(String pat){//结果返回的是小的字符串        int length = search(root,pat,0,0);        return pat.substring(0,length);    }    private int search(Node x,String pat,int d,int length){        if(x == null)            return length;        if(x.val != null)            length = d;//只有val不是null的时候才会更新length        if(d == pat.length())//如果pat全部找完了就不用找了            return length;        char c = pat.charAt(d);        return search(x.next[c],pat,d+1,length);    }}
阅读全文
0 0