自定义计数器
来源:互联网 发布:天弘基金淘宝店首页 编辑:程序博客网 时间:2024/05/23 20:25
代码:
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Counter;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;public class CounterTest {public static class MyMapper extendsMapper<LongWritable, Text, Text, LongWritable> {final Text k2 = new Text();final LongWritable v2 = new LongWritable();protected void map(LongWritable key, Text value,Mapper<LongWritable, Text, Text, LongWritable>.Context context)throws InterruptedException, IOException {Counter counterForhello=context.getCounter("xiaobaozi", "startText");Counter counterForyou=context.getCounter("xiaobaozi", "endText");final String line = value.toString();if(line!=null){if(line.contains("hello")){counterForhello.increment(1);}if(line.contains("you")){counterForyou.increment(1);}}final String[] splited = line.split("\\s");for (String word : splited) {k2.set(word);v2.set(1);context.write(k2, v2);}}}public static class MyReducer extendsReducer<Text, LongWritable, Text, LongWritable> {LongWritable v3 = new LongWritable();protected void reduce(Text k2, Iterable<LongWritable> v2s,Reducer<Text, LongWritable, Text, LongWritable>.Context context)throws IOException, InterruptedException {long count = 0L;for (LongWritable v2 : v2s) {count += v2.get();}v3.set(count);context.write(k2, v3);}}public static void main(String[] args) throws Exception {final Configuration conf = new Configuration();final Job job = Job.getInstance(conf, CounterTest.class.getSimpleName());// 1.1FileInputFormat.setInputPaths(job,"hdfs://192.168.1.100:9000/input/hehe");NLineInputFormat.setNumLinesPerSplit(job, Integer.parseInt("2"));//NLineInputFormat.setNumLinesPerSplit(job, Integer.parseInt(args[0]));job.setInputFormatClass(NLineInputFormat.class);// 1.2job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(LongWritable.class);// 1.3job.setPartitionerClass(HashPartitioner.class);job.setNumReduceTasks(1);// 1.4// 1.5// 2.2job.setReducerClass(MyReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LongWritable.class);// 2.3FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.100:9000/out1"));job.setOutputFormatClass(TextOutputFormat.class);// job.setJarByClass(CounterTest.class);job.waitForCompletion(true);}}
console输出结果:
15/04/16 14:06:19 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable15/04/16 14:06:19 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.15/04/16 14:06:19 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).15/04/16 14:06:20 INFO input.FileInputFormat: Total input paths to process : 115/04/16 14:06:20 INFO mapred.JobClient: Running job: job_local535401905_000115/04/16 14:06:20 INFO mapred.LocalJobRunner: Waiting for map tasks15/04/16 14:06:20 INFO mapred.LocalJobRunner: Starting task: attempt_local535401905_0001_m_000000_015/04/16 14:06:21 INFO mapred.Task: Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/hehe:18+1915/04/16 14:06:21 INFO mapred.MapTask: io.sort.mb = 10015/04/16 14:06:21 INFO mapred.MapTask: data buffer = 79691776/9961472015/04/16 14:06:21 INFO mapred.MapTask: record buffer = 262144/32768015/04/16 14:06:21 WARN snappy.LoadSnappy: Snappy native library not loaded15/04/16 14:06:21 INFO mapred.MapTask: Starting flush of map output15/04/16 14:06:21 INFO mapred.MapTask: Finished spill 015/04/16 14:06:21 INFO mapred.Task: Task:attempt_local535401905_0001_m_000000_0 is done. And is in the process of commiting15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Task: Task 'attempt_local535401905_0001_m_000000_0' done.15/04/16 14:06:21 INFO mapred.LocalJobRunner: Finishing task: attempt_local535401905_0001_m_000000_015/04/16 14:06:21 INFO mapred.LocalJobRunner: Starting task: attempt_local535401905_0001_m_000001_015/04/16 14:06:21 INFO mapred.Task: Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/hehe:0+1815/04/16 14:06:21 INFO mapred.MapTask: io.sort.mb = 10015/04/16 14:06:21 INFO mapred.MapTask: data buffer = 79691776/9961472015/04/16 14:06:21 INFO mapred.MapTask: record buffer = 262144/32768015/04/16 14:06:21 INFO mapred.MapTask: Starting flush of map output15/04/16 14:06:21 INFO mapred.MapTask: Finished spill 015/04/16 14:06:21 INFO mapred.Task: Task:attempt_local535401905_0001_m_000001_0 is done. And is in the process of commiting15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Task: Task 'attempt_local535401905_0001_m_000001_0' done.15/04/16 14:06:21 INFO mapred.LocalJobRunner: Finishing task: attempt_local535401905_0001_m_000001_015/04/16 14:06:21 INFO mapred.LocalJobRunner: Map task executor complete.15/04/16 14:06:21 INFO mapred.Task: Using ResourceCalculatorPlugin : null15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:21 INFO mapred.Merger: Merging 2 sorted segments15/04/16 14:06:21 INFO mapred.Merger: Down to the last merge-pass, with 2 segments left of total size: 122 bytes15/04/16 14:06:21 INFO mapred.LocalJobRunner: 15/04/16 14:06:22 INFO mapred.JobClient: map 100% reduce 0%15/04/16 14:06:23 INFO mapred.Task: Task:attempt_local535401905_0001_r_000000_0 is done. And is in the process of commiting15/04/16 14:06:23 INFO mapred.LocalJobRunner: 15/04/16 14:06:23 INFO mapred.Task: Task attempt_local535401905_0001_r_000000_0 is allowed to commit now15/04/16 14:06:23 INFO output.FileOutputCommitter: Saved output of task 'attempt_local535401905_0001_r_000000_0' to hdfs://192.168.1.100:9000/out115/04/16 14:06:23 INFO mapred.LocalJobRunner: reduce > reduce15/04/16 14:06:23 INFO mapred.Task: Task 'attempt_local535401905_0001_r_000000_0' done.15/04/16 14:06:24 INFO mapred.JobClient: map 100% reduce 100%15/04/16 14:06:24 INFO mapred.JobClient: Job complete: job_local535401905_000115/04/16 14:06:24 INFO mapred.JobClient: Counters: 2115/04/16 14:06:24 INFO mapred.JobClient: File Output Format Counters 15/04/16 14:06:24 INFO mapred.JobClient: Bytes Written=1915/04/16 14:06:24 INFO mapred.JobClient: xiaobaozi15/04/16 14:06:24 INFO mapred.JobClient: endText=215/04/16 14:06:24 INFO mapred.JobClient: startText=415/04/16 14:06:24 INFO mapred.JobClient: FileSystemCounters15/04/16 14:06:24 INFO mapred.JobClient: FILE_BYTES_READ=131915/04/16 14:06:24 INFO mapred.JobClient: HDFS_BYTES_READ=25015/04/16 14:06:24 INFO mapred.JobClient: FILE_BYTES_WRITTEN=20969015/04/16 14:06:24 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=1915/04/16 14:06:24 INFO mapred.JobClient: File Input Format Counters 15/04/16 14:06:24 INFO mapred.JobClient: Bytes Read=5815/04/16 14:06:24 INFO mapred.JobClient: Map-Reduce Framework15/04/16 14:06:24 INFO mapred.JobClient: Map output materialized bytes=13015/04/16 14:06:24 INFO mapred.JobClient: Map input records=415/04/16 14:06:24 INFO mapred.JobClient: Reduce shuffle bytes=015/04/16 14:06:24 INFO mapred.JobClient: Spilled Records=1615/04/16 14:06:24 INFO mapred.JobClient: Map output bytes=10215/04/16 14:06:24 INFO mapred.JobClient: Total committed heap usage (bytes)=68281958415/04/16 14:06:24 INFO mapred.JobClient: SPLIT_RAW_BYTES=20215/04/16 14:06:24 INFO mapred.JobClient: Combine input records=015/04/16 14:06:24 INFO mapred.JobClient: Reduce input records=815/04/16 14:06:24 INFO mapred.JobClient: Reduce input groups=315/04/16 14:06:24 INFO mapred.JobClient: Combine output records=015/04/16 14:06:24 INFO mapred.JobClient: Reduce output records=315/04/16 14:06:24 INFO mapred.JobClient: Map output records=8
可以看到输出结果:
15/04/16 14:06:24 INFO mapred.JobClient: Counters: 2115/04/16 14:06:24 INFO mapred.JobClient: File Output Format Counters 15/04/16 14:06:24 INFO mapred.JobClient: Bytes Written=1915/04/16 14:06:24 INFO mapred.JobClient: xiaobaozi15/04/16 14:06:24 INFO mapred.JobClient: endText=215/04/16 14:06:24 INFO mapred.JobClient: startText=4
关键代码:
Counter counterForhello=context.getCounter("xiaobaozi", "startText");Counter counterForyou=context.getCounter("xiaobaozi", "endText");final String line = value.toString();if(line!=null){if(line.contains("hello")){counterForhello.increment(1);}if(line.contains("you")){counterForyou.increment(1);}}
0 0
- 自定义计数器
- oracle添加自定义计数器
- Hadoop中自定义计数器
- 【hadoop】用户自定义计数器
- Hadoop中自定义计数器
- Hadoop自定义计数器Counter
- Hadoop中自定义计数器
- Hadoop中自定义计数器
- hadoop命令行中自定义计数器
- hadoop命令行中自定义计数器
- Hadoop自定义计数器的使用
- swift 自定义购物车计数器
- Hadoop学习笔记—7.计数器与自定义计数器
- Hadoop计数器与自定义计数器(笔记7)
- 一个自定义位数的多用户计数器
- Hadoop之——自定义计数器
- Zabbix 自定义监控Windows性能计数器
- 自定义控件——加减计数器
- 拟牛顿法/Quasi-Newton,DFP算法/Davidon-Fletcher-Powell,及BFGS算法/Broyden-Fletcher-Goldfarb-Shanno
- myecilipse如何修改servlet模板
- (转载)详解KMP算法
- jquery时间戳转换为日期格式
- IOS中Json解析的四种方法
- 自定义计数器
- Android 开源框架Universal-Image-Loader完全解析(二)--- 图片缓存策略详解
- ZOJ 3868 GCD Expectation 莫比乌斯反演
- django 同步数据库
- Java(二) this 和super 详解
- UIScrollView的分页与缩放
- opencv-2.4.10 linux环境搭建
- 安装 xdebug centos 5.8
- Libgdx 多场景适配