使用伸展树(SplayTree)统计单词频率的Java实现

来源:互联网 发布:ipad应用网络连接失败 编辑:程序博客网 时间:2024/05/22 04:33

伸展树(Splay Tree)是特殊的二叉搜索树。

伸展树在查询时进行旋转,使得刚刚查到的节点成为树根,越频繁使用的节点会越靠近树根,这样可以加快访问速度。关于伸展树的介绍很多,在了解二叉树和AVL树后,伸展树的原理会比较好理解。本文不再介绍伸展树,而是统计各个单词在一篇文章中出现的频率,并将它们构造成伸展树。


1、构造二叉搜索树

二叉搜索树节点:
/** * Node of Binary Search Tree * @author cuiods */public class BSTNode<T extends Comparable<? super T>> {    protected T data;    protected BSTNode<T> left, right;    public BSTNode() {        this(null, null, null);    }    public BSTNode(T data) {        this(data, null, null);    }    public BSTNode(T data, BSTNode left, BSTNode right) {        this.data = data;        this.left = left;        this.right = right;    }}
二叉搜索树:
/** * Binary Search Tree * @author cuiods */public class BSTree<T extends Comparable<? super T>> {    protected BSTNode<T> root;    public T search(T data) {        return search(root, data);    }    public void insert(T data) {        BSTNode<T> p = root, prev = null;        while (p != null) {            prev = p;            if (p.data.compareTo(data) < 0) {                p = p.right;            } else {                p = p.left;            }        }        if (root == null) {            root = new BSTNode<T>(data);        } else if (prev.data.compareTo(data) < 0) {            prev.right = new BSTNode<T>(data);        } else {            prev.left = new BSTNode<T>(data);        }    }    public void inorder() {        inorder(root);    }    protected void inorder(BSTNode<T> node) {        if (node != null) {            inorder(node.left);            visit(node.data);            inorder(node.right);        }    }    protected T search(BSTNode<T> p, T data) {        while (p != null) {            if (data.equals(p.data)) {                return data;            } else if (data.compareTo(p.data) < 0) {                p = p.left;            } else {                p = p.right;            }        }        return null;    }    protected void visit(T node) {        System.out.println(node.toString() + " ");    }}

2、构造伸展树

主要实现的是伸展树的插入和搜索方法,最核心的是伸展方法,借鉴:
http://www.cnblogs.com/skywang12345/p/3604286.html

伸展树节点:
/** * node of splay tree * @author cuiods */public class SplayTreeNode<T extends Comparable<? super T>> extends BSTNode<T> {    public SplayTreeNode() {        right = left = null;    }    public SplayTreeNode(T data) {        this(data,null,null);    }    public SplayTreeNode(T data, SplayTreeNode<T> left, SplayTreeNode<T> right) {        this.data = data;        this.left = left;        this.right = right;    }}
伸展树:
/** * splay tree implement * http://www.cnblogs.com/skywang12345/p/3604286.html * @author cuiods */public abstract class SplayTree<T extends Comparable<? super T>> extends BSTree<T> {    /**     * 处理插入时遇到相同的节点     * @param data 插入时已经存在的节点     */    protected abstract void handleSame(T data);    @Override    public void insert(T key) {        SplayTreeNode<T> z=new SplayTreeNode<T>(key);        // 插入节点        root = insert((SplayTreeNode<T>) root, z);        // 将节点(key)旋转为根节点        root = splay((SplayTreeNode<T>) root, key);    }    @Override    public T search(T key) {        T result = super.search(key);        splay(key);        return result;    }    private void splay(T key) {        root = splay((SplayTreeNode<T>) root, key);    }    /*    * 旋转key对应的节点为根节点,并返回根节点。    *    * 注意:    *   (a):伸展树中存在"键值为key的节点"。    *          将"键值为key的节点"旋转为根节点。    *   (b):伸展树中不存在"键值为key的节点",并且key < tree.key。    *      b-1 "键值为key的节点"的前驱节点存在的话,将"键值为key的节点"的前驱节点旋转为根节点。    *      b-2 "键值为key的节点"的前驱节点不存在的话,则意味着,key比树中任何键值都小,那么此时,将最小节点旋转为根节点。    *   (c):伸展树中不存在"键值为key的节点",并且key > tree.key。    *      c-1 "键值为key的节点"的后继节点存在的话,将"键值为key的节点"的后继节点旋转为根节点。    *      c-2 "键值为key的节点"的后继节点不存在的话,则意味着,key比树中任何键值都大,那么此时,将最大节点旋转为根节点。    */    private SplayTreeNode<T> splay(SplayTreeNode<T> tree, T data) {        if (tree == null)            return null;        SplayTreeNode<T> N = new SplayTreeNode<T>();        SplayTreeNode<T> l = N;        SplayTreeNode<T> r = N;        SplayTreeNode<T> c;        for (;;) {            int cmp = data.compareTo(tree.data);            if (cmp < 0) {                if (tree.left == null)                    break;                if (data.compareTo(tree.left.data) < 0) {                    c = (SplayTreeNode<T>) tree.left;                           /* rotate right */                    tree.left = c.right;                    c.right = tree;                    tree = c;                    if (tree.left == null)                        break;                }                r.left = tree;                               /* link right */                r = tree;                tree = (SplayTreeNode<T>) tree.left;            } else if (cmp > 0) {                if (tree.right == null)                    break;                if (data.compareTo(tree.right.data) > 0) {                    c = (SplayTreeNode<T>) tree.right;                          /* rotate left */                    tree.right = c.left;                    c.left = tree;                    tree = c;                    if (tree.right == null)                        break;                }                l.right = tree;                              /* link left */                l = tree;                tree = (SplayTreeNode<T>) tree.right;            } else {                break;            }        }        l.right = tree.left;                                /* assemble */        r.left = tree.right;        tree.left = N.right;        tree.right = N.left;        return tree;    }    /*    * 将结点插入到伸展树中,并返回根节点    *    * 参数说明:    *     tree 伸展树的    *     z 插入的结点    */    private SplayTreeNode<T> insert(SplayTreeNode<T> tree, SplayTreeNode<T> z) {        int cmp;        SplayTreeNode<T> y = null;        SplayTreeNode<T> x = tree;        // 查找z的插入位置        while (x != null) {            y = x;            cmp = z.data.compareTo(x.data);            if (cmp < 0)                x = (SplayTreeNode<T>) x.left;            else if (cmp > 0)                x = (SplayTreeNode<T>) x.right;            else {                handleSame(z.data);                return tree;            }        }        if (y==null)            tree = z;        else {            cmp = z.data.compareTo(y.data);            if (cmp < 0)                y.left = z;            else                y.right = z;        }        return tree;    }}

3、统计单词频率

特殊的单词伸展树:
/** * word splay tree * @author cuiods */public class WordSplay extends SplayTree<Word>{    private int differentWords, wordCount;    @Override    protected void handleSame(Word data) {        data.setFreq(data.getFreq()+1);    }    @Override    protected void visit(Word word) {        super.visit(word);        differentWords ++;        wordCount += word.getFreq();    }    public void run(InputStream inputStream, String fileName) {        int ch = 1;        Word p = null;        try {            while (ch >-1) {                while (true) {                    if (ch > -1 && !Character.isLetter((char) ch))                        ch = inputStream.read();                    else break;                }                if (ch == -1) break;                String s = "";                while (ch > -1 && Character.isLetter((char) ch)) {                    s += Character.toUpperCase((char) ch);                    ch = inputStream.read();                }                insert(new Word(s));            }        } catch (IOException e) {            e.printStackTrace();        }        inorder();        System.out.println("\nFile "+fileName+" contains "+ wordCount + " words among which "+differentWords+" are different.\n");    }}

单词类:

/** * @author cuiods */public class Word implements Comparable<Word>{    private String word;    private int freq = 1;    public Word(String w) {        word = w;    }    public String getWord() {        return word;    }    public void setWord(String word) {        this.word = word;    }    public int getFreq() {        return freq;    }    public void setFreq(int freq) {        this.freq = freq;    }    @Override    public boolean equals(Object o) {        if (this == o) return true;        if (o == null || getClass() != o.getClass()) return false;        Word word1 = (Word) o;        return word != null ? word.equals(word1.word) : word1.word == null;    }    @Override    public int compareTo(Word o) {        return word.compareTo(o.word);    }    @Override    public String toString() {        return "Word{" +                "word='" + word + '\'' +                '}';    }}

使用单词伸展树统计:
/** * splay word test */public class Main {    public static void main(String[] args) {        InputStream inputStream = null;        String fileName = "";        try {            if (args.length == 0) {                System.out.print("Enter a file name: ");                Scanner scanner = new Scanner(System.in);                fileName = scanner.nextLine();                inputStream = new FileInputStream(fileName);            } else {                inputStream = new FileInputStream(args[0]);                fileName = args[0];            }            WordSplay splay = new WordSplay();            splay.run(inputStream,fileName);        } catch (IOException e) {            e.printStackTrace();        }    }}





1 0
原创粉丝点击