Hadoop 上传小文件 合成sequencefile 记录

来源:互联网 发布:2017年二季度经济数据 编辑:程序博客网 时间:2024/05/16 17:15

    在Hadoop中,支持对二进制文件的处理,而sequencefile的方法,便是其中的重点。

    以下为实践中实现的经验:(本次记录上传文件。)

package test;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.BytesWritable;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;public class SequenceFileWriteDemo {private static String[] Data={"one,two show one","three,four show two","five,six show three","seven,eight show four","nine,ten show five"};public void writeText() throws IOException{String uri="hdfs://192.168.50.28:8020/user/root/jyl/testImageSequenceFile";Configuration con=new Configuration();FileSystem fs=FileSystem.get(URI.create(uri), con);Path path=new Path("uri");IntWritable key=new IntWritable();Text value=new Text();SequenceFile.Writer writer=null;writer=SequenceFile.createWriter(fs, con, path, key.getClass(), value.getClass());for(int i=0;i<100;i++){key.set(100-i);value.set(Data[i%Data.length]);System.out.printf("[%s]\t%s\t%s\n",writer.getLength(),key,value);writer.append(key, value);}IOUtils.closeStream(writer);}public void writeImage() throws IOException{String uri="hdfs://192.168.50.28:8020/user/root/jyl/testByteImageSequenceFile";Configuration con=new Configuration();FileSystem fs=FileSystem.get(URI.create(uri), con);Path path=new Path(uri);BytesWritable key=new BytesWritable();BytesWritable value=new BytesWritable();File file1=new File("/mnt/disk1/yl/images/zhouzhou.jpg");File file2=new File("/mnt/disk1/yl/images/gouhuo.jpg");InputStream in1=new FileInputStream(file1);InputStream in2=new FileInputStream(file2);byte[] byte1=new byte[(int) file1.length()];byte[] byte2=new byte[(int) file2.length()];in1.read(byte1);in2.read(byte2);SequenceFile.Writer writer=null;writer=SequenceFile.createWriter(fs, con, path, BytesWritable.class,value.getClass());byte[] b1=new byte[1];b1[0]=1;BytesWritable bw1 = new BytesWritable(b1);byte[] b2=new byte[1];b2[0]=2;BytesWritable bw2 = new BytesWritable(b2);value.set(byte1, 0, byte1.length);writer.append(bw1,value);value.set(byte2, 0, byte2.length);writer.append(bw2,value);IOUtils.closeStream(writer);}public static void main(String[] args) throws IOException {SequenceFileWriteDemo demo=new SequenceFileWriteDemo();demo.writeImage();//demo.writeText();}}