7、压缩与解压缩

来源:互联网 发布:招淘宝客服在家兼职 编辑:程序博客网 时间:2024/06/15 18:15

在hadoop中有两个地方需要用到压缩:其一是在HDFS上存储数据,节省存储空间;其二是集群间的通讯需要数据压缩,提高带宽的利用率。在java中一切输入输出都是以流的方式进行。一个可以读取字节序列的对象叫输入流,一个可以写入字节序列的对象叫输出流,如文件,网络连接、内存等都可以是输入和输出流。

1、从文件到文件的压缩

package com.kevin.hadoop;import java.io.InputStream;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.util.ReflectionUtils;import org.apache.hadoop.fs.Path;public class CprsF2F {public static void main(String[] args)throws Exception{if(args.length != 3){System.err.println("Usage: CprsF2F cmps_name_src_target");System.exit(2);}Class<?> codecClass = Class.forName(args[0]);//未知类Configuration conf = new Configuration();CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);InputStream in = null;OutputStream out = null;FileSystem fs = FileSystem.get(URI.create(args[1]), conf);try{in = fs.open(new Path(args[1]));out = codec.createOutputStream(fs.create(new Path(args[2])));IOUtils.copyBytes(in, out, conf);}finally{IOUtils.closeStream(in);IOUtils.closeStream(out);}}}

重要的两个类ReflectionUtils和CompressionCodec

2、从标准输入到压缩文件
package com.kevin.hadoop;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.util.ReflectionUtils;public class CprsIn2F {public static void main(String[] args)throws Exception{if(args.length != 2){System.err.println("Usage:CprsIn2F cmps_name_target");System.exit(2);}Class<?> codecClass = Class.forName(args[0]);Configuration conf = new Configuration();CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);OutputStream out = null;FileSystem fs = FileSystem.get(URI.create(args[1]),conf);try{out = codec.createOutputStream(fs.create(new Path(args[1])));IOUtils.copyBytes(System.in, out, 4096, false);}finally{IOUtils.closeStream(out);}}}

3、文件到文件的解压

package com.kevin.DcprsF2F;import java.io.InputStream;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.util.ReflectionUtils;public class DcprsF2F {public static void main(String[] args)throws Exception{if(args.length != 3){System.err.println("Usage: DcprsF2F cmps_name_src_target");System.exit(2);}Class<?> codecClass = Class.forName(args[0]);Configuration conf = new Configuration();CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);InputStream in = null;OutputStream out = null;FileSystem fs = FileSystem.get(URI.create(args[1]),conf);try{in = codec.createInputStream(fs.open(new Path(args[1])),codec.createDecompressor());out = fs.create(new Path(args[2]));IOUtils.copyBytes(in, out, conf);}finally{IOUtils.closeStream(in);IOUtils.closeStream(out);}}}




0 0
原创粉丝点击