SequenceFileInputFormat用法
来源:互联网 发布:阿里云和华为云的区别 编辑:程序博客网 时间:2024/05/18 00:05
SequenceFileInputFormat只能处理SequenceFile类型的文件。
代码:
package inputformat;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;//用之前SequenceFile类型的文件作为处理数据,用那个for循环生成的数据,那个数据指定的类型是<LongWritable,Text>//SequenceFileInputFormat只能处理SequenceFile类型的数据public class SequenceFileInputFormatTest {public static class MyMapper extendsMapper<LongWritable, Text, Text, LongWritable> {final Text k2 = new Text();final LongWritable v2 = new LongWritable();protected void map(LongWritable key, Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws InterruptedException, IOException {final String line = value.toString();final String[] splited = line.split("\\s");for (String word : splited) {k2.set(word);v2.set(1);context.write(k2, v2);}}}public static class MyReducer extendsReducer<Text, LongWritable, Text, LongWritable> {LongWritable v3 = new LongWritable();protected void reduce(Text k2, Iterable<LongWritable> v2s,Reducer<Text, LongWritable, Text, LongWritable>.Context context)throws IOException, InterruptedException {long count = 0L;for (LongWritable v2 : v2s) {count += v2.get();}v3.set(count);context.write(k2, v3);}}public static void main(String[] args) throws Exception {final Configuration conf = new Configuration();final Job job = Job.getInstance(conf, SequenceFileInputFormatTest.class.getSimpleName());// 1.1FileInputFormat.setInputPaths(job,"hdfs://192.168.1.10:9000/sf1");//这里改了一下,把TextInputFormat改成了SequenceFileInputFormatjob.setInputFormatClass(SequenceFileInputFormat.class);// 1.2job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);// 1.3 默认只有一个分区job.setPartitionerClass(HashPartitioner.class);job.setNumReduceTasks(1);// 1.4省略不写// 1.5省略不写// 2.2job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);// 2.3FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.10:9000/out1"));job.setOutputFormatClass(TextOutputFormat.class);// 执行打成jar包的程序时,必须调用下面的方法job.setJarByClass(SequenceFileInputFormatTest.class);job.waitForCompletion(true);}}
生成SequenceFile类型的文件,供上述SequenceFileInputFormat使用,作为输入数据:
package sequenceFile;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Text;import org.apache.zookeeper.common.IOUtils;//for循环读写操作演示public class Forduxie {public static void main(String args[]) throws Exception {final Path path = new Path("/sf1");Configuration conf = new Configuration();final FileSystem fs = FileSystem.get(new URI("hdfs://192.168.1.10:9000/"), conf);@SuppressWarnings("deprecation")final SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,path, LongWritable.class,Text.class);for (int i = 0; i < 10; i++) {writer.append(new LongWritable(i), new Text(i+"=_="));}IOUtils.closeStream(writer);@SuppressWarnings({ "deprecation" })final SequenceFile.Reader reader = new SequenceFile.Reader(fs, path,conf);LongWritable key = new LongWritable();Text val = new Text();while (reader.next(key, val)) {System.out.println(key.get() + "\t" + val.toString());}IOUtils.closeStream(reader);}}
如果创建的是Maven项目,需要在pom包里添加:
<span style="white-space:pre"></span><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.4</version></dependency>
0 0
- SequenceFileInputFormat用法
- SequenceFileInputFormat用法
- SequenceFileInputFormat区别TextInputFormat
- MapReduce 的格式输入----SequenceFileInputFormat ---源码分析
- MR-2.输入格式(InputFormat)TextInputFormat和SequenceFileInputFormat源码分析
- 用法
- ,, 用法
- 用法
- 用法
- #用法
- ??用法
- !!用法
- CustDialog用法 SimpleAdapter 用法
- hibernate_Restrictions用法 hibernate_Restrictions用法
- extern用法+assert用法
- getParameterValues用法
- rpm用法
- Log4j用法
- JVM调优总结(七)-典型配置举例(1)
- 嵌入式Linux要学哪些东西?你真的造吗?
- IOS用CGContextRef画各种图形(文字、圆、直线、弧线、矩形、扇形、椭圆、三角形、圆角矩形、贝塞尔曲线、图片)
- 集合框架六:ArrayList——将自定义对象作为元素存储到ArrayList中,并去除重复元素
- C-顺序栈
- SequenceFileInputFormat用法
- javabean
- jQuery选择器大全
- JVM调优总结(八)-典型配置举例(2)
- Entity Framework Code First关系映射约定 转载
- 黑马程序员_日记48_泛型基本使用
- 集合框架一:Collection基本应用(入门)
- android在代码中设置TextView的颜色
- 07模块-AngularJS基础教程