sequencefile 由hdfs 上读取文件

来源:互联网 发布:淘宝商品自动被下架了 编辑:程序博客网 时间:2024/05/20 15:37

   链接上一个文章,上传而做的读取操作。

  文章的链接地址是 http://blog.csdn.net/so_so_jiang/article/details/8484686

  这一篇是在MP中采用SequenceFileAsBinaryInputFormat方法说处理后产生的文件读取。主要涉及的变换是key和value值的设置和路径的变换便可。

package test;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.BytesWritable;import org.apache.hadoop.io.SequenceFile;public class SequenceFileReadDemo {public void readText(){}public void readImage() throws IOException{//String uri="hdfs://192.168.50.28:8020/user/root/jyl/testByteImageSequenceFile";String uri="hdfs://192.168.50.28:8020/user/root/jyl/testByteImageSequenceFileMR/part-00000";Configuration con=new Configuration();FileSystem fs=FileSystem.get(URI.create(uri), con);Path path=new Path(uri);SequenceFile.Reader reader=null;reader=new SequenceFile.Reader(fs, path, con);BytesWritable key=new BytesWritable();BytesWritable value=new BytesWritable();long position=reader.getPosition();while(reader.next(key, value)){System.out.println("key:"+key);File file=new File(key+".jpg");OutputStream out=new FileOutputStream(file);byte[] b=value.getBytes();System.out.println("b.length:"+b.length);out.write(b);position=reader.getPosition();}}public static void main(String[] args) throws IOException {SequenceFileReadDemo demo=new SequenceFileReadDemo();demo.readImage();}}


原创粉丝点击