sequenceFile 转换,并导入至hbase中 (图文解说 2017-7-23)

来源:互联网 发布:2016高速公路数据分析 编辑:程序博客网 时间:2024/06/05 00:32

我找了几个图片作为数据:


  

   总共有5张(粉色的)

先上传至hdfs   

hdfs://172.16.11.222:9000/JpgSequence

想要生成的sequencefile 的位置:

"hdfs://172.16.11.222:9000/Sequence/bb.txt";

具体步骤和解释全在代码里面标注《看完请评价》:


直接上代码:


这里先解释一下,sequence File的操作 由于hadoop版本的原因,会有所不同。这里用的是hadoop2.X的版本,比较新。


前提准备:hbase  已经启动;

有 student 表     info列族  (自己建一个)

import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.commons.io.IOUtils;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.hbase.util.ReflectionUtils;import org.apache.hadoop.io.BytesWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.io.Writable;import java.net.URI;/** * Created by Administrator on 2017/7/24. */public class SequenceFileTest {      static String PATH = "hdfs://172.16.11.222:9000/Sequence/bb.txt";      static SequenceFile.Writer writer = null;    public static void main(String[] args) throws Exception{//  Configuration conf = new Configuration();//  String path = "hdfs://172.16.11.222:9000/JpgSequence";//  URI uri = new URI(path);//  FileSystem fileSystem = FileSystem.get(uri, conf);//  writer = SequenceFile.createWriter(fileSystem, conf, new Path(PATH), Text.class, BytesWritable.class);//  listFileAndWriteToSequenceFile(fileSystem,path);      readSequenceFileAndWriteToHBase(new Path(PATH));    }    /****     * 递归文件;并将文件写成SequenceFile文件     * @param fileSystem     * @param path     * @throws Exception     */    public static void listFileAndWriteToSequenceFile(FileSystem fileSystem, String path) throws Exception{        final FileStatus[] listStatuses = fileSystem.listStatus(new Path(path));        for (FileStatus fileStatus : listStatuses) {            if(fileStatus.isFile()){                Text fileText = new Text(fileStatus.getPath().toString());                System.out.println(fileText.toString());                FSDataInputStream in = fileSystem.open(new Path(fileText.toString()));                byte[] buffer = IOUtils.toByteArray(in);                in.read(buffer);                BytesWritable value = new BytesWritable(buffer);                //写成SequenceFile文件                writer.append(fileText, value);                System.out.println(fileText+"   转换   SequenceFile成功");            }            if(fileStatus.isDirectory()){                listFileAndWriteToSequenceFile(fileSystem,fileStatus.getPath().toString());            }        }    }    /***     *     * 读取sequenceFile文件,并将文件写入HBase (应当再加个 tablename 参数)     * @param path1     * @throws Exception     * **/    public static void readSequenceFileAndWriteToHBase(Path path1)throws Exception{        Configuration conf1 = new Configuration();        conf1.set("fs.default.name", "hdfs://172.16.11.222:9000");        //写入HBase        Configuration conf =HBaseConfiguration.create();        conf.set("hbase.zookeeper.quorum", "172.16.11.221,172.16.11.222,172.16.11.223");        conf.set("hbase.zookeeper.property.clientPort", "2800");        // 将该值改大,防止hbase超时退出        conf.set("dfs.socket.timeout", "180000");        //指定表名        HTable htable = new HTable(conf,"student");        //读取sequenceFile文件,创建reader对象        // 新版 hadoop 2  的读取方式        SequenceFile.Reader.Option option1 = SequenceFile.Reader.file(path1);        SequenceFile.Reader reader = null;        try {            reader = new SequenceFile.Reader(conf1,option1);            Text key = (Text) org.apache.hadoop.util.ReflectionUtils.newInstance(                    reader.getKeyClass(), conf1);            BytesWritable value = (BytesWritable) org.apache.hadoop.util.ReflectionUtils.newInstance(                    reader.getValueClass(), conf1);            long position = reader.getPosition();            while (reader.next(key, value)) {                String syncSeen = reader.syncSeen() ? "*" : "";                System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value);                String temp = key.toString();//                System.out.println(key.toString()+"。。。。。。。。key  值");//                System.out.println(temp+".........切分key");//                temp = temp.substring(temp.indexOf("hdfs://")+7);  //   172.16.11.222:9000/JpgSequence/化2.jpg////                String[] keyCat=temp.split("/");//                String tempIp=temp.split("/")[0].split(":")[0];           //   172.16.11.222//                String port=temp.split("/")[0].split(":")[1];             //   9000//                String path=temp.split("/")[1];                           //   JpgSequence//                String data=temp.split("/")[keyCat.length-1];             //   化2.jpg               // rowKey 设计  可以根据上面 自由定义,自由拼接                String rowKey =temp;                      //这里没有修改key                System.out.println(rowKey);//                //value 定义                String sequenceFileValue=value.toString();                //指定ROWKEY的值                Put put = new Put(Bytes.toBytes(rowKey));                //指定列簇名称、列修饰符、列值                put.add("info".getBytes(), temp.getBytes(), sequenceFileValue.getBytes());                htable.put(put);                System.out.println(rowKey+"。。。。。载入成功");                position = reader.getPosition(); // beginning of next record            }        } finally {            org.apache.hadoop.io.IOUtils.closeStream(reader);        }        //下面是网上原来的,我没有测试成功//        BytesWritable val = new BytesWritable();//        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf1);//        val = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf1);////        int i = 0;//        while(reader.next(key, val)){////            String temp = key.toString();//            temp = temp.substring(temp.indexOf("Image")+6, temp.indexOf("."));//            String[] tmp = temp.split("/");//            //rowKey 设计//            String rowKey = Integer.valueOf(tmp[0])-1+"_"+Integer.valueOf(tmp[1])/2+"_"+Integer.valueOf(tmp[2])/2;//            System.out.println(rowKey);////////            //指定ROWKEY的值//            Put put = new Put(Bytes.toBytes(rowKey));//            //指定列簇名称、列修饰符、列值//            put.add("picinfo".getBytes(), temp.getBytes(), val.getBytes());//            htable.put(put);////        }//        org.apache.hadoop.io.IOUtils.closeStream(reader);    }}

来张 结果图(序列太长了  就放个最后几行):


hbase (main)>    scan  'student'   《测试成功请评价