自定义计数器

来源：互联网发布：天弘基金淘宝店首页编辑：程序博客网时间：2024/05/23 20:25

代码：

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Counter;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;public class CounterTest {public static class MyMapper extendsMapper<LongWritable, Text, Text, LongWritable> {final Text k2 = new Text();final LongWritable v2 = new LongWritable();protected void map(LongWritable key, Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws InterruptedException, IOException {Counter counterForhello=context.getCounter("xiaobaozi", "startText");Counter counterForyou=context.getCounter("xiaobaozi", "endText");final String line = value.toString();if(line!=null){if(line.contains("hello")){counterForhello.increment(1);}if(line.contains("you")){counterForyou.increment(1);}}final String[] splited = line.split("\\s");for (String word : splited) {k2.set(word);v2.set(1);context.write(k2, v2);}}}public static class MyReducer extendsReducer<Text, LongWritable, Text, LongWritable> {LongWritable v3 = new LongWritable();protected void reduce(Text k2, Iterable<LongWritable> v2s,Reducer<Text, LongWritable, Text, LongWritable>.Context context)throws IOException, InterruptedException {long count = 0L;for (LongWritable v2 : v2s) {count += v2.get();}v3.set(count);context.write(k2, v3);}}public static void main(String[] args) throws Exception {final Configuration conf = new Configuration();final Job job = Job.getInstance(conf, CounterTest.class.getSimpleName());// 1.1FileInputFormat.setInputPaths(job,"hdfs://192.168.1.100:9000/input/hehe");NLineInputFormat.setNumLinesPerSplit(job, Integer.parseInt("2"));//NLineInputFormat.setNumLinesPerSplit(job, Integer.parseInt(args[0]));job.setInputFormatClass(NLineInputFormat.class);// 1.2job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);// 1.3job.setPartitionerClass(HashPartitioner.class);job.setNumReduceTasks(1);// 1.4// 1.5// 2.2job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);// 2.3FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.100:9000/out1"));job.setOutputFormatClass(TextOutputFormat.class);// job.setJarByClass(CounterTest.class);job.waitForCompletion(true);}}

console输出结果：

15/04/16 14:06:19 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable15/04/16 14:06:19 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.15/04/16 14:06:19 WARN mapred.JobClient: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).15/04/16 14:06:20 INFO input.FileInputFormat: Total input paths to process : 115/04/16 14:06:20 INFO mapred.JobClient: Running job: job_local535401905_000115/04/16 14:06:20 INFO mapred.LocalJobRunner: Waiting for map tasks15/04/16 14:06:20 INFO mapred.LocalJobRunner: Starting task: attempt_local535401905_0001_m_000000_015/04/16 14:06:21 INFO mapred.Task:  Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/hehe:18+1915/04/16 14:06:21 INFO mapred.MapTask: io.sort.mb = 10015/04/16 14:06:21 INFO mapred.MapTask: data buffer = 79691776/9961472015/04/16 14:06:21 INFO mapred.MapTask: record buffer = 262144/32768015/04/16 14:06:21 WARN snappy.LoadSnappy: Snappy native library not loaded15/04/16 14:06:21 INFO mapred.MapTask: Starting flush of map output15/04/16 14:06:21 INFO mapred.MapTask: Finished spill 015/04/16 14:06:21 INFO mapred.Task: Task:attempt_local535401905_0001_m_000000_0 is done. And is in the process of commiting15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Task: Task 'attempt_local535401905_0001_m_000000_0' done.15/04/16 14:06:21 INFO mapred.LocalJobRunner: Finishing task: attempt_local535401905_0001_m_000000_015/04/16 14:06:21 INFO mapred.LocalJobRunner: Starting task: attempt_local535401905_0001_m_000001_015/04/16 14:06:21 INFO mapred.Task:  Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/hehe:0+1815/04/16 14:06:21 INFO mapred.MapTask: io.sort.mb = 10015/04/16 14:06:21 INFO mapred.MapTask: data buffer = 79691776/9961472015/04/16 14:06:21 INFO mapred.MapTask: record buffer = 262144/32768015/04/16 14:06:21 INFO mapred.MapTask: Starting flush of map output15/04/16 14:06:21 INFO mapred.MapTask: Finished spill 015/04/16 14:06:21 INFO mapred.Task: Task:attempt_local535401905_0001_m_000001_0 is done. And is in the process of commiting15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Task: Task 'attempt_local535401905_0001_m_000001_0' done.15/04/16 14:06:21 INFO mapred.LocalJobRunner: Finishing task: attempt_local535401905_0001_m_000001_015/04/16 14:06:21 INFO mapred.LocalJobRunner: Map task executor complete.15/04/16 14:06:21 INFO mapred.Task:  Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Merger: Merging 2 sorted segments15/04/16 14:06:21 INFO mapred.Merger: Down to the last merge-pass, with 2 segments left of total size: 122 bytes15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:22 INFO mapred.JobClient:  map 100% reduce 0%15/04/16 14:06:23 INFO mapred.Task: Task:attempt_local535401905_0001_r_000000_0 is done. And is in the process of commiting15/04/16 14:06:23 INFO mapred.LocalJobRunner: 15/04/16 14:06:23 INFO mapred.Task: Task attempt_local535401905_0001_r_000000_0 is allowed to commit now15/04/16 14:06:23 INFO output.FileOutputCommitter: Saved output of task 'attempt_local535401905_0001_r_000000_0' to hdfs://192.168.1.100:9000/out115/04/16 14:06:23 INFO mapred.LocalJobRunner: reduce > reduce15/04/16 14:06:23 INFO mapred.Task: Task 'attempt_local535401905_0001_r_000000_0' done.15/04/16 14:06:24 INFO mapred.JobClient:  map 100% reduce 100%15/04/16 14:06:24 INFO mapred.JobClient: Job complete: job_local535401905_000115/04/16 14:06:24 INFO mapred.JobClient: Counters: 2115/04/16 14:06:24 INFO mapred.JobClient:   File Output Format Counters 15/04/16 14:06:24 INFO mapred.JobClient:     Bytes Written=1915/04/16 14:06:24 INFO mapred.JobClient:   xiaobaozi15/04/16 14:06:24 INFO mapred.JobClient:     endText=215/04/16 14:06:24 INFO mapred.JobClient:     startText=415/04/16 14:06:24 INFO mapred.JobClient:   FileSystemCounters15/04/16 14:06:24 INFO mapred.JobClient:     FILE_BYTES_READ=131915/04/16 14:06:24 INFO mapred.JobClient:     HDFS_BYTES_READ=25015/04/16 14:06:24 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=20969015/04/16 14:06:24 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=1915/04/16 14:06:24 INFO mapred.JobClient:   File Input Format Counters 15/04/16 14:06:24 INFO mapred.JobClient:     Bytes Read=5815/04/16 14:06:24 INFO mapred.JobClient:   Map-Reduce Framework15/04/16 14:06:24 INFO mapred.JobClient:     Map output materialized bytes=13015/04/16 14:06:24 INFO mapred.JobClient:     Map input records=415/04/16 14:06:24 INFO mapred.JobClient:     Reduce shuffle bytes=015/04/16 14:06:24 INFO mapred.JobClient:     Spilled Records=1615/04/16 14:06:24 INFO mapred.JobClient:     Map output bytes=10215/04/16 14:06:24 INFO mapred.JobClient:     Total committed heap usage (bytes)=68281958415/04/16 14:06:24 INFO mapred.JobClient:     SPLIT_RAW_BYTES=20215/04/16 14:06:24 INFO mapred.JobClient:     Combine input records=015/04/16 14:06:24 INFO mapred.JobClient:     Reduce input records=815/04/16 14:06:24 INFO mapred.JobClient:     Reduce input groups=315/04/16 14:06:24 INFO mapred.JobClient:     Combine output records=015/04/16 14:06:24 INFO mapred.JobClient:     Reduce output records=315/04/16 14:06:24 INFO mapred.JobClient:     Map output records=8

可以看到输出结果：

15/04/16 14:06:24 INFO mapred.JobClient: Counters: 2115/04/16 14:06:24 INFO mapred.JobClient:   File Output Format Counters 15/04/16 14:06:24 INFO mapred.JobClient:     Bytes Written=1915/04/16 14:06:24 INFO mapred.JobClient:   xiaobaozi15/04/16 14:06:24 INFO mapred.JobClient:     endText=215/04/16 14:06:24 INFO mapred.JobClient:     startText=4

关键代码：

Counter counterForhello=context.getCounter("xiaobaozi", "startText");Counter counterForyou=context.getCounter("xiaobaozi", "endText");final String line = value.toString();if(line!=null){if(line.contains("hello")){counterForhello.increment(1);}if(line.contains("you")){counterForyou.increment(1);}}

0 0