Sequence小文件合并

来源:互联网 发布:新浪短网址api js 编辑:程序博客网 时间:2024/05/01 19:39

把本地的多个小文件合并上传成大文件

package sequenceFileText;import java.io.BufferedInputStream;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;/** * 合并文件 * @author 韩利鹏 * */public class SequenceFileWriter {    public static void main(String[] args) throws Exception {        //得到输出流        Configuration conf = new Configuration();        String seqFS = "hdfs://192.168.10.97:9000/han/sequenceFile";        FileSystem fs = FileSystem.get(URI.create(seqFS), conf);        Path seqPath = new Path(seqFS);        SequenceFile.Writer  writer = null;        writer=SequenceFile.createWriter(fs, conf, seqPath, NullWritable.class, Text.class);        Text value = new Text();        //得到输入流        String filePath = "D:/decstop/uploadToHaoop/";        File gzPath = new File(filePath);        String[] gzFiles = gzPath.list();        int fileLen = gzFiles.length;        //循环的读取        while(fileLen>0){            File file = new File(filePath+gzFiles[fileLen-1]);            InputStream in = new BufferedInputStream(new FileInputStream(file));            long len = file.length();            byte[] buffer = new byte[(int)len];            if((len = in.read(buffer))!=-1){                value.set(buffer);                writer.append(NullWritable.get(), value);            }            //资源的回收            value.clear();            in.close();            fileLen--;        }    }}

把大文件下载到本地(本程序待完善)

package sequenceFileText;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.util.ReflectionUtils;/** * 从hdfs上下载小文件(待完善) * @author 韩利鹏 * */public class SequenceFileReader {    public static void main(String[] args) throws Exception {        //得到输入流        Configuration conf = new Configuration();        String seqFS = "hdfs://192.168.10.97:9000/han/sequenceFile";        FileSystem fs = FileSystem.get(URI.create(seqFS), conf);        Path seqPath = new Path(seqFS);        SequenceFile.Reader reader = null;        reader = new SequenceFile.Reader(fs, seqPath,conf);        Writable key=(Writable)ReflectionUtils.newInstance(reader.getKeyClass() , conf);        Text value =new Text();        //得到输出流        String localPath="d:/a.txt";        OutputStream out = new FileOutputStream(localPath);         //循环写入        while(reader.next(key,value)){            out.write(value.getBytes(),0,value.getLength());            value.clear();        }        //关闭资源        out.flush();        reader.close();        out.close();    }}
0 0
原创粉丝点击