我的读取hadoop Sequence格式的文件的代码

来源:互联网 发布:国泰安数据库的网址 编辑:程序博客网 时间:2024/06/05 14:09
public static void main(String[] args) {org.apache.hadoop.io.SequenceFile.Reader reader = null;java.io.FileOutputStream fos = null;try {String uri = "file:///D:/attempt_201212181734_2923950_r_000000_0";org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();FileSystem fs = FileSystem.get(conf);Path path = new Path(uri);reader = new org.apache.hadoop.io.SequenceFile.Reader(fs, path, conf);Writable key = (Writable) org.apache.hadoop.util.ReflectionUtils.newInstance(reader.getKeyClass(), conf);Writable value = (Writable) org.apache.hadoop.util.ReflectionUtils.newInstance(reader.getValueClass(), conf);int n=0;while(reader.next(key, value)){/* 如果解析出是乱码,尝试用户UTF8转码 *///String valueStr = new String(value.toString().getBytes("ISO8859_1"),"GB2312");System.out.println(value.toString());}} catch (Exception e) {e.printStackTrace();} finally {IOUtils.closeStream(reader);IOUtils.closeStream(fos);}}

原创粉丝点击