MapReduce的NlineInputFormat

来源：互联网发布：阿里双11实时数据编辑：程序博客网时间：2024/05/16 12:54

默认情况下在对输入文件进行拆分时，会按block块的大小分成多个InputSplit，InputSplit的数量取决于block的大小。每

个map进程处理一个InputSplit，InputSplit中有多少行记录就会调用多少次map函数。

如果使用NlineInputFormat，代表每个map进程处理的InputSplit不再按block块去划分，而是按NlineInputFormat指定的

行数N来划分。即，每个InputSplit中只有N行记录数。同样InputSplit中有多少行记录就会调用多少次map函数。

代码示例：

package com.bigdata.hadoop.mapred;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MyNLineInputFormatApp {private static final String INPUT_PATH = "hdfs://hadoop1:9000/dir1/hello";private static final String OUTPUT_PATH = "hdfs://hadoop1:9000/dir1/out";public static void main(String[] args) throws Exception {Configuration configuration = new Configuration();//方式一 设置每个InputSplit中划分三条记录configuration.setInt("mapreduce.input.lineinputformat.linespermap", 3);Job job = new Job(configuration,MyNLineInputFormatApp.class.getSimpleName());//方式二 设置每个InputSplit中划分三条记录//NLineInputFormat.setNumLinesPerSplit(job, 3);final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), configuration);fileSystem.delete(new Path(OUTPUT_PATH),true);//使用NLineInputFormat处理记录数job.setInputFormatClass(NLineInputFormat.class);job.setJarByClass(MyNLineInputFormatApp.class);FileInputFormat.setInputPaths(job, INPUT_PATH);job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));job.waitForCompletion(true);}public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{@Overrideprotected void map(LongWritable key, Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws IOException, InterruptedException {final String line = value.toString();final String[] splited = line.split("\t");for (int i = 0; i < splited.length; i++) {context.write(new Text(splited[i]), new LongWritable(1));}}}public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{@Overrideprotected void reduce(Text key, Iterable<LongWritable> values,Reducer<Text, LongWritable, Text, LongWritable>.Context context)throws IOException, InterruptedException {long count = 0l;for (LongWritable times : values) {count += times.get();}context.write(key, new LongWritable(count));}}}

0 0