关于文件目录线程级深度优先搜索效率的研究

来源：互联网发布：北京哪里有mac专柜编辑：程序博客网时间：2024/05/01 16:09

我承认标题很高大上，其实只是一个初中生的无聊研究。

ps：如果有建议，随便提。

正文：

试验方法：

不限制深度，但限制时间。在同一个根目录（C:\）开始搜索，过滤出dll文件，第一次用普通方法（单线程），记录查找到的文件数目，第二次用线程级的方法（多线程），一样记录数目。对比两次实验的结果，得到结论。

代码片段：

目录的深度优先算法（普通版的）（自写的，不知道是不是最优-->_-->）：

        public static Function<String, Predicate<File>> isendwith = s            -> isDic.negate().and(f -> f.getName().toUpperCase().endsWith(s.toUpperCase()));    private long size = 0;    boolean stop = false;    private void search(File f, int deepth, String n) {        if (stop) {            return;        }        //if(deepth <= 0)        //return;        File[] files = f.listFiles();        if (files == null) {            return;        }        List<File> list = Arrays.asList(files);        size += list.stream().filter(isendwith.apply(n)).count();        list.stream().filter(isDic).forEach(fi -> search(fi, deepth - 1, n));        list = null;        files = null;    }

其中 stop 为控制是否结束的变量

线程版的（有点多）：

第一个类：BLockingStack

一个线程安全的栈，用以存放搜索到的目录。用来放Inf类。

（原理参见，虽然是阻塞队列：http://www.cnblogs.com/yjmyzz/p/BlockingQueue-in-java.html）

import java.util.Stack;import java.util.concurrent.ArrayBlockingQueue;import java.util.concurrent.locks.ReentrantLock;import java.util.concurrent.locks.Condition;/** * * @author Administrator * @param <E> */public class BlockingStack<E> {    private Stack<E> stack = null;    private ReentrantLock lock = null;    private Condition notEmpty = null;        public BlockingStack(){        stack = new Stack();        lock = new ReentrantLock(false);        notEmpty = lock.newCondition();    }        public void push(E item) throws InterruptedException{        final ReentrantLock lock = this.lock;        lock.lockInterruptibly();        checkisNull(item);        stack.push(item);        notEmpty.signal();        lock.unlock();    }        public E pop() throws InterruptedException {        final ReentrantLock lock = this.lock;        lock.lockInterruptibly();        try{            while(stack.isEmpty())                notEmpty.await();            return stack.pop();        }finally{            lock.unlock();        }    }        public E peek() throws InterruptedException {        final ReentrantLock lock = this.lock;        lock.lockInterruptibly();        try{            while(stack.isEmpty())                notEmpty.await();            return stack.peek();        }finally{            lock.unlock();        }    }        private void checkisNull(E e){        if(e == null)            throw new NullPointerException();    }    }

第二个类：Inf

极其简单，用来储存一个File，一个deepth。

本来deepth是用来判断是否结束搜索的，但实验不限制深度，所以暂时没用。

public class Inf {    File f = null;    int deepth = -1;        public Inf(File fi, int d){        f = fi;        deepth = d;    }}

第三类:FileSearcher

较为重要，是线程搜索的核心。

接受一个BLockingStack<Inf>，只是一个公共的栈，从中获取要搜索的目录。

LinkedList<File>，一个公共链表，用于储存结果。

String，过滤的文件后缀名。

ps：虽然代码中有两个synchronized，但我不知道是不是可以去掉。

结束的话先用interrupt，再用stopsearch

import java.io.File;import java.util.LinkedList;import java.util.logging.Level;import java.util.logging.Logger;/** * * @author Administrator */public class FileSearcher extends Thread{    private BlockingStack<Inf> dics = null;    private LinkedList<File> files = null;    private String sfx = null;    private boolean stop = true;        public FileSearcher(BlockingStack<Inf> d, LinkedList<File> f, String sfx){        dics = d;        files = f;        this.sfx = sfx;    }        public synchronized void stopsearch(){        stop = false;    }        private void search() throws InterruptedException {        while(stop){            Inf inf = dics.pop();            final File[] fs = inf.f.listFiles();            if(fs != null){                synchronized(fs){                    for(File f : fs){                        if(f.isDirectory())                            dics.push(new Inf(f, 0));                        else                            if(f.getName().toUpperCase().endsWith(sfx.toUpperCase()))                                files.add(f);                    }                }            }        }    }    @Override    public void run() {        try {            search();        } catch (InterruptedException ex) {            Logger.getLogger(FileSearcher.class.getName()).log(Level.SEVERE, null, ex);        }    }}

最后是条用的类（包含普通方法）：

然后

import java.util.ArrayList;import java.util.List;import java.util.function.Predicate;import java.io.File;import java.util.Arrays;import java.util.LinkedList;import java.util.function.Function;import java.util.logging.Level;import java.util.logging.Logger;public class Test {    private ArrayList<File> files = new ArrayList(64);    public static Predicate<File> isDic = File::isDirectory;    public static Function<String, Predicate<File>> isendwith = s            -> isDic.negate().and(f -> f.getName().toUpperCase().endsWith(s.toUpperCase()));    private long size = 0;    public Test() {        /*Thread t = new Thread() {                @Override        public void run() {        search(new File("C:\\"), 0, ".dll");        }        };        t.start();        try {        Thread.sleep(10000);        } catch (InterruptedException ex) {        Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);        }        stop = true;        System.out.println(size);*/                                            //以上是普通方法的调用，要创一个线程调用，主线程计时        BlockingStack<Inf> stack = new BlockingStack();        LinkedList<File> list = new LinkedList();        FileSearcher fs = new FileSearcher(stack, list, ".dll");        FileSearcher fs1 = new FileSearcher(stack, list, ".dll");        FileSearcher fs2 = new FileSearcher(stack, list, ".dll");        FileSearcher fs3 = new FileSearcher(stack, list, ".dll");        try {            stack.push(new Inf(new File("C:\\"), 0));            fs.start();            fs1.start();            fs2.start();            fs3.start();            Thread.sleep(10000);            fs.interrupt();            fs1.interrupt();            fs2.interrupt();            fs3.interrupt();            fs.stopsearch();            fs1.stopsearch();            fs2.stopsearch();            fs3.stopsearch();        } catch (InterruptedException ex) {            Logger.getLogger(Test.class.getName()).log(Level.SEVERE, null, ex);        } finally {            System.out.println(list.size());        }<span style="white-space:pre"></span>         //线程级方法调用，自动创建线程，主线程用于计时    }    boolean stop = false;    private void search(File f, int deepth, String n) {        if (stop) {            return;        }        //if(deepth <= 0)        //return;        File[] files = f.listFiles();        if (files == null) {            return;        }        List<File> list = Arrays.asList(files);        size += list.stream().filter(isendwith.apply(n)).count();        list.stream().filter(isDic).forEach(fi -> search(fi, deepth - 1, n));        list = null;        files = null;    }}

结果：

但是对比第一次与第二次的普通方法的结果，就尴尬了。

这次数越多，结果也越大。但还好，稳在了8千左右。

于是，我猜想，数量的增多是不是与磁盘优化，或系统提供的优化有关（如果有知道为啥的，回复一下，谢了）。于是有了13行的操作。

但然并卵，反而结果变大了，so，有了17行的操作。普通方法的结果终于降下来了。

数据的对比很明显了，线程级的效率高。但对比15，与16的数据，又尴尬了（#滑稽）。发现线程不是越多越好。

最后的结论是：

线程级的效率普遍高于普通方法，但2-3个线程足矣，不然会产生浪费。

ps：看到系统盘多余29600个dll我就方了。

0 0