028_MapReduce中的计数器Counter的使用

来源:互联网 发布:梦幻邮箱数据 编辑:程序博客网 时间:2024/05/20 23:56
一、分析运行wordcount程序屏幕上打印信息

##运行wordcount单词频率统计程序,基于输出输出路径。

[hadoop@hadoop-master hadoop-1.2.1]$ hadoop jar hadoop-examples-1.2.1.jar wordcount /wc/mininput /wc/minoutput

##告知输入路径下有几个文件需要处理

16/03/14 05:08:59 INFO input.FileInputFormat: Total input paths to process : 2

##告知本地的snappy压缩算法的库是可用的,默认情况下,Linux是没有相应的库的

16/03/14 05:08:59 INFO util.NativeCodeLoader: Loaded the native-hadoop library

##加载本地的Hadoop库文件,默认情况下,在hadoop1.x中存放在$HADOOP_HOME/c++/Linux-amd64-64/lib(针对64bitLinux操作系统)

16/03/14 05:08:59 WARN snappy.LoadSnappy: Snappy native library not loaded

##运行Job的相关进度信息

###加载本地的snappy压缩算法的库,默认情况下,Linux是没有相应的库的,当没有配置时不显示

###运行Job的ID

16/03/14 05:08:59 INFO mapred.JobClient: Running job: job_201603140438_0001

###Job运行时,Map task 和 Reduce task的运行进度

16/03/14 05:09:00 INFO mapred.JobClient:  map 0% reduce 0%

16/03/14 05:09:12 INFO mapred.JobClient:  map 100% reduce 0%

16/03/14 05:09:21 INFO mapred.JobClient:  map 100% reduce 33%

16/03/14 05:09:23 INFO mapred.JobClient:  map 100% reduce 100%

###Job运行完成

16/03/14 05:09:24 INFO mapred.JobClient: Job complete: job_201603140438_0001

##显示整个Job运行过程中,各类计算器Counter的值

 

##总共有29中计数器Counter

16/03/14 05:09:24 INFO mapred.JobClient: Counters: 29

16/03/14 05:09:24 INFO mapred.JobClient:   Map-Reduce Framework          ##计数器个数:16

16/03/14 05:09:24 INFO mapred.JobClient:     Spilled Records=66

16/03/14 05:09:24 INFO mapred.JobClient:     Map output materialized bytes=440

16/03/14 05:09:24 INFO mapred.JobClient:     Reduce input records=33

16/03/14 05:09:24 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=5798617088

16/03/14 05:09:24 INFO mapred.JobClient:     Map input records=18

16/03/14 05:09:24 INFO mapred.JobClient:     SPLIT_RAW_BYTES=244

16/03/14 05:09:24 INFO mapred.JobClient:     Map output bytes=395

16/03/14 05:09:24 INFO mapred.JobClient:     Reduce shuffle bytes=440

16/03/14 05:09:24 INFO mapred.JobClient:     Physical memory (bytes) snapshot=411492352

16/03/14 05:09:24 INFO mapred.JobClient:     Reduce input groups=33

16/03/14 05:09:24 INFO mapred.JobClient:     Combine output records=33

16/03/14 05:09:24 INFO mapred.JobClient:     Reduce output records=33

16/03/14 05:09:24 INFO mapred.JobClient:     Map output records=37

16/03/14 05:09:24 INFO mapred.JobClient:     Combine input records=37

16/03/14 05:09:24 INFO mapred.JobClient:     CPU time spent (ms)=2100

16/03/14 05:09:24 INFO mapred.JobClient:     Total committed heap usage (bytes)=337780736

16/03/14 05:09:24 INFO mapred.JobClient:   File Input Format Counters               ##计数器个数:1

16/03/14 05:09:24 INFO mapred.JobClient:     Bytes Read=261

16/03/14 05:09:24 INFO mapred.JobClient:   FileSystemCounters          ##计数器个数:4

16/03/14 05:09:24 INFO mapred.JobClient:     HDFS_BYTES_READ=505

16/03/14 05:09:24 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=164155

16/03/14 05:09:24 INFO mapred.JobClient:     FILE_BYTES_READ=434

16/03/14 05:09:24 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=296

16/03/14 05:09:24 INFO mapred.JobClient:   Job Counters           ##计数器个数:7

16/03/14 05:09:24 INFO mapred.JobClient:     Launched map tasks=2

16/03/14 05:09:24 INFO mapred.JobClient:     Launched reduce tasks=1

16/03/14 05:09:24 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10635

16/03/14 05:09:24 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0

16/03/14 05:09:24 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=18759

16/03/14 05:09:24 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0

16/03/14 05:09:24 INFO mapred.JobClient:     Data-local map tasks=2

16/03/14 05:09:24 INFO mapred.JobClient:   File Output Format Counters            ##计数器个数:1

16/03/14 05:09:24 INFO mapred.JobClient:     Bytes Written=296

  1 package org.dragon.hadoop.mapreduce.counter;  2   3 import java.io.IOException;  4 import java.util.StringTokenizer;  5   6 import org.apache.hadoop.conf.Configuration;  7 import org.apache.hadoop.fs.Path;  8 import org.apache.hadoop.io.IntWritable;  9 import org.apache.hadoop.io.LongWritable; 10 import org.apache.hadoop.io.Text; 11 import org.apache.hadoop.mapreduce.Job; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.Reducer; 14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 16 import org.apache.hadoop.util.GenericOptionsParser; 17  18 /** 19  * function:在wordcount中自定义计数器样例 20  * @author ZhuXY   21  * @time   2016-3-14 上午10:48:29 22  * 23  */ 24 public class DIYCounter { 25     static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> { 26  27         private Text word = new Text(); 28         private final static IntWritable one = new IntWritable(1); 29  30         // 快捷键alt+shift+s 31         // map方法每次只读取一行数据,换句话说就是每行启动一个map函数 32         @Override 33         protected void map(LongWritable key, Text value, Context context) 34                 throws IOException, InterruptedException { 35              36             //############################################################### 37             context.getCounter("DIYCOUTER_MAP_REDUCE", "MAP_INPUT_KEYVALUES") 38             .increment(1L); 39             //############################################################### 40  41             // 获取每行数据的值 42             String lineValue = value.toString(); 43  44             // 进行分割 45             StringTokenizer stringTokenizer = new StringTokenizer(lineValue); 46  47             // 遍历 48             while (stringTokenizer.hasMoreElements()) { 49  50                 // 获取每个值 51                 String worldValue = stringTokenizer.nextToken(); 52  53                 // 设置map, 输入的key值 54                 word.set(worldValue); 55                 context.write(word, one); // 如果出现就出现一次,存在每行出现几次,这时候键的值一样,多个键值对 56             } 57         } 58     } 59  60     // Reducer 区域 61     static class MyReducer extends 62             Reducer<Text, IntWritable, Text, IntWritable> { 63  64         private IntWritable resultIntWritable = new IntWritable(); 65  66         // 这里key还是key。但是value变成了values 67         @Override 68         protected void reduce(Text key, Iterable<IntWritable> values, 69                 Context context) throws IOException, InterruptedException { 70  71             context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_INPUT_KEYVALUES") 72                     .increment(1L); 73  74             // 用于累加的变量 75             int sum = 0; 76             // 循环遍历Interable 77             for (IntWritable value : values) { 78                 // 累加 79                 sum += value.get(); 80             } 81  82             // 设置总次数 83             resultIntWritable.set(sum); 84             context.getCounter("DIYCOUTER_MAP_REDUCE", "REDUCE_OUTPUT_KEYVALUES") 85             .increment(1L); 86             context.write(key, resultIntWritable); 87         } 88     } 89  90     // Client 区域 91     public static void main(String[] args) throws Exception { 92  93         // 获取配置文件信息 94         Configuration configuration = new Configuration(); 95  96         // 配置输入输出文件路径 97         args = new String[] { 98                 "hdfs://hadoop-master.dragon.org:9000/wc/mininput", 99                 "hdfs://hadoop-master.dragon.org:9000/wc/wcoutput" };100 101         // 当命令格式不正确的时候,提示,类似于shell中的命令提示102         String[] otherArgs = new GenericOptionsParser(configuration, args)103                 .getRemainingArgs();104         if (otherArgs.length != 2) {105             System.err.println("Usage: wordcount <in> <out>");106             System.exit(2);107         }108 109         // 创建job。设置配置文件信息和Job名称110         Job job = new Job(configuration, "wc");111 112         // 1、设置Job运行的类113         job.setJarByClass(DIYCounter.class);114 115         // 2、设置Mapper类和Reducer类116         job.setMapperClass(MyMapper.class);117         job.setReducerClass(MyReducer.class);118 119         // 3、设置输入文件的目录和输出文件目录120         FileInputFormat.addInputPath(job, new Path(args[0]));121         FileOutputFormat.setOutputPath(job, new Path(args[1]));122 123         // 4、设置输出结果的key和value的类型124         job.setOutputKeyClass(Text.class);125         job.setOutputValueClass(IntWritable.class);126 127         // 5、提交Job等待运行结果,并在客户端显示运行信息128         boolean isSuccess = job.waitForCompletion(true);129 130         // 6、结束程序131         System.exit(isSuccess ? 0 : 1);132     }133 }
View计数器 Code

 

0 0