Berkeley DB

来源:互联网 发布:会计培训 知乎 编辑:程序博客网 时间:2024/03/29 04:37

Berkeley DB 是一个嵌入式数据库,它适合于管理海量的(256T)、简单的数据。BDB是以键值对(value/key)来存储和管理数据库的。键可以重复,数据值可以是任意类型的。BDB的底层是用B+树或者其他算法实现的。我用的jar包是B+树实现的版本。

Berkeley DB是用Environment对象来管理数据库,一个Environment可以管理多个database。每个database都存储键值对,而序列化到磁盘上是通过catalog实现的。BDB的操作是在内存和磁盘上的,最终BDB的存取结果集在程序中使用是通过容器实现的(数据库在程序中的视图)。

所以BDB的使用分为5步:

  • 创建Environment
    //Open Environment    private Environment environment;    //同EnvironmentConfig来配置环境    EnvironmentConfig environmentConfig=new EnvironmentConfig();    environmentConfig.setTransactional(true);    environmentConfig.setAllowCreate(true);    //homeDirectory是数据库存放的目录    environment=new Environment(new File(homeDirectory),environmentConfig);
  • 打开database和catalog
    protected StoredClassCatalog catalog;//catalog    protected Database database;//database    private static final String CLASS_CATALOG="java_class_catalog";//数据库名    protected Database catalogDatabase;//catalog存放处    //open Database    DatabaseConfig dbConfig=new DatabaseConfig();//数据库配置    dbConfig.setTransactional(true);    catalogDBConfig.setAllowCreate(true);    dbConfig.setSortedDuplicates(false);//不存重复键值    database=environment.openDatabase(null, "URL", dbConfig);//存放实际数据的数据库       //Open Catalog    DatabaseConfig catalogDBConfig=new DatabaseConfig();//数据库配置    catalogDBConfig.setTransactional(true);    catalogDBConfig.setAllowCreate(true);    catalogDatabase=environment.openDatabase(null, CLASS_CATALOG, catalogDBConfig);    //用StoredClassCatalog类将catalog保存在db中并返回可操作对象。    catalog=new StoredClassCatalog(catalogDatabase);
  • 存储序列化类型绑定
    //键绑定    EntryBinding<Integer> keyBinding=new SerialBinding<Integer>(catalog,Integer.class);    //值绑定    SerialBinding<Url> valueBinding=new SerialBinding<Url>(catalog,Url.class);
  • 存储结果容器访问
    StoreMap<Integer,Url> urlMap;//结果集的操作容器(视图)    urlMap=new StoredMap<Integer,Url>(database,keyBinding,valueBinding,true);
  • 关闭资源
     database.close();     catalog.close();     environment.close();

我在爬虫项目中庸BDB作为内存数据库来保存未访问的URL。由于StoredMap元素存储无序,StoredsortedMap给元素排序后存储。但都不是队列的序列。故本例中以整数位主键存储,记录整数的值来模拟队列的头和尾。StoredMap实现了Map接口,可以使用其所有函数,例如:get()、remove()、put()等。

java代码如下:

//BDBFrontier.javaimport java.io.File;import java.io.FileNotFoundException;import com.sleepycat.bind.serial.StoredClassCatalog;import com.sleepycat.je.Database;import com.sleepycat.je.DatabaseConfig;import com.sleepycat.je.DatabaseException;import com.sleepycat.je.Environment;import com.sleepycat.je.EnvironmentConfig;public abstract class BDBFrontier{    public BDBFrontier(String homeDirectory) throws DatabaseException,FileNotFoundException{        //Open Environment        System.out.println("Opening environment in: "+homeDirectory);        EnvironmentConfig environmentConfig=new EnvironmentConfig();        environmentConfig.setTransactional(true);        environmentConfig.setAllowCreate(true);        environment=new Environment(new File(homeDirectory),environmentConfig);        //Open Catalog        DatabaseConfig catalogDBConfig=new DatabaseConfig();        catalogDBConfig.setTransactional(true);        catalogDBConfig.setAllowCreate(true);        catalogDatabase=environment.openDatabase(null, CLASS_CATALOG, catalogDBConfig);        catalog=new StoredClassCatalog(catalogDatabase);        //open Database        DatabaseConfig dbConfig=new DatabaseConfig();        dbConfig.setTransactional(true);        dbConfig.setAllowCreate(true);        database=environment.openDatabase(null, "URL", dbConfig);    }    public void close() throws DatabaseException{        database.close();        catalog.close();//这句应该可以关闭与之相关的数据库,但是API上没有将        environment.close();    }    protected abstract Object put(Object key,Object value);    protected abstract Object get(Object key);    protected abstract Object delete(Object key);    private Environment environment;    protected StoredClassCatalog catalog;    protected Database database;    private static final String CLASS_CATALOG="java_class_catalog";//name    protected Database catalogDatabase;}//BDBFrontier.javaimport java.io.FileNotFoundException;import com.sleepycat.bind.EntryBinding;import com.sleepycat.bind.serial.SerialBinding;import com.sleepycat.collections.StoredMap;import com.sleepycat.je.DatabaseException;public class Frontier extends BDBFrontier implements UrlFrontier{    private StoredMap<Integer, Url> urlMap=null;//a database view    private Integer head;//URL队列头    private Integer tail;//URL队列尾    public Frontier(String homeDirectory) throws DatabaseException, FileNotFoundException {        super(homeDirectory);        // TODO Auto-generated constructor stub        EntryBinding<Integer> keyBinding=new SerialBinding<Integer>(catalog,Integer.class);        SerialBinding<Url> valueBinding=new SerialBinding<Url>(catalog,Url.class);        //创建视图,并设置为可写        urlMap=new StoredMap<Integer,Url>(database,keyBinding,valueBinding,true);//true可写        head=0;        tail=0;    }    @Override    public Url getNext() throws Exception {        // TODO Auto-generated method stub        Url result=null;        if(!urlMap.isEmpty()){            result=urlMap.get(head);            delete(head++);        }        return result;    }    @Override    public boolean putUrl(Url url) throws Exception {        // TODO Auto-generated method stub        if( put(tail++,url) != null) return true;        else return false;    }    @Override    protected Object put(Object key, Object value) {        // TODO Auto-generated method stub        return urlMap.put((Integer)key, (Url)value);    }    @Override    protected Object get(Object key) {        // TODO Auto-generated method stub        return urlMap.get(key);    }    @Override    protected Object delete(Object key) {        // TODO Auto-generated method stub        return urlMap.remove(key);    }    public boolean isEmpty() {        // TODO Auto-generated method stub        return urlMap.isEmpty();    }    public boolean contains(Url url) {        // TODO Auto-generated method stub        return urlMap.containsValue(url);    }    public static void main(String[] args){        try{            Frontier frontier=new Frontier("D:\\workspace\\db");            Url url=new Url();            url.setOriUrl("http://www.163.com");            frontier.putUrl(url);            url.setOriUrl("http://www.164.com");            frontier.putUrl(url);            url.setOriUrl("http://www.165.com");            frontier.putUrl(url);            System.out.println(frontier.getNext().getOriUrl());            System.out.println(frontier.getNext().getOriUrl());            System.out.println(frontier.getNext().getOriUrl());            frontier.close();        }catch(Exception e){            e.printStackTrace();        }finally{        }    }}

参考文献
自己动手写网络爬虫
嵌入式数据库系统Berkeley DB
Berkeley DB 使用经验总结

0 0