Hadoop自定义数据类型编程练习
来源:互联网 发布:vb连接oracle 编辑:程序博客网 时间:2024/05/22 09:47
Hadoop自定义数据类型编程练习
代码:
package zidongyi; import java.io.DataInput;import java.io.DataOutput;import java.io.IOException; importorg.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.mapreduce.lib.output.TextOutputFormat;importorg.apache.hadoop.mapreduce.lib.partition.HashPartitioner; public class KpiApp { staticfinal String INPUT_PATH ="hdfs://192.168.1.100:9000/input/HTTP_20130313143750.dat"; staticfinal String OUT_PATH = "hdfs://192.168.1.100:9000/output/out02"; publicstatic void main(String[] args) throws Exception{ finalJob job = new Job(new Configuration(), KpiApp.class.getSimpleName()); //1.1指定输入文件路径 FileInputFormat.setInputPaths(job,INPUT_PATH); //指定哪个类用来格式化输入文件 job.setInputFormatClass(TextInputFormat.class); //1.2指定自定义的Mapper类 job.setMapperClass(MyMapper.class); //指定输出<k2,v2>的类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(KpiWritable.class); //1.3指定分区类 job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //1.4TODO 排序、分区 //1.5 TODO (可选)合并 //2.2指定自定义的reduce类 job.setReducerClass(MyReducer.class); //指定输出<k3,v3>的类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(KpiWritable.class); //2.3指定输出到哪里 FileOutputFormat.setOutputPath(job,new Path(OUT_PATH)); //设定输出文件的格式化类 job.setOutputFormatClass(TextOutputFormat.class); //把代码提交给JobTracker执行 job.waitForCompletion(true); } staticclass MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable>{ protectedvoid map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,KpiWritable>.Contextcontext) throws IOException ,InterruptedException { finalString[] splited = value.toString().split("\t"); finalString msisdn = splited[1]; finalText k2 = new Text(msisdn); finalKpiWritable v2 = new KpiWritable(splited[6],splited[7],splited[8],splited[9]); context.write(k2,v2); }; } staticclass MyReducer extends Reducer<Text, KpiWritable, Text, KpiWritable>{ /** * @param k2 表示整个文件中不同的手机号码 * @param v2s 表示该手机号在不同时段的流量的集合 */ protectedvoid reduce(Text k2, java.lang.Iterable<KpiWritable> v2s,org.apache.hadoop.mapreduce.Reducer<Text,KpiWritable,Text,KpiWritable>.Contextcontext) throws IOException ,InterruptedException { longupPackNum = 0L; longdownPackNum = 0L; longupPayLoad = 0L; longdownPayLoad = 0L; for(KpiWritable kpiWritable : v2s) { upPackNum+= kpiWritable.upPackNum; downPackNum+= kpiWritable.downPackNum; upPayLoad+= kpiWritable.upPayLoad; downPayLoad+= kpiWritable.downPayLoad; } finalKpiWritable v3 = new KpiWritable(upPackNum+"",downPackNum+"", upPayLoad+"", downPayLoad+""); context.write(k2,v3); }; }} class KpiWritable implements Writable{ longupPackNum; longdownPackNum; longupPayLoad; longdownPayLoad; publicKpiWritable(){} publicKpiWritable(String upPackNum, String downPackNum, String upPayLoad, StringdownPayLoad){ this.upPackNum= Long.parseLong(upPackNum); this.downPackNum= Long.parseLong(downPackNum); this.upPayLoad= Long.parseLong(upPayLoad); this.downPayLoad= Long.parseLong(downPayLoad); } @Override publicvoid readFields(DataInput in) throws IOException { this.upPackNum= in.readLong(); this.downPackNum= in.readLong(); this.upPayLoad= in.readLong(); this.downPayLoad= in.readLong(); } @Override publicvoid write(DataOutput out) throws IOException { out.writeLong(upPackNum); out.writeLong(downPackNum); out.writeLong(upPayLoad); out.writeLong(downPayLoad); } @Override publicString toString() { returnupPackNum + "\t" + downPackNum + "\t" + upPayLoad +"\t" + downPayLoad; }}
数据上传到HDFS上面:
运行过程Console:
15/02/2200:04:22 WARN util.NativeCodeLoader: Unable to load native-hadoop library foryour platform... using builtin-java classes where applicable15/02/2200:04:22 WARN mapred.JobClient: Use GenericOptionsParser for parsing thearguments. Applications should implement Tool for the same.15/02/2200:04:22 WARN mapred.JobClient: No job jar file set. User classes may not be found. SeeJobConf(Class) or JobConf#setJar(String).15/02/2200:04:22 INFO input.FileInputFormat: Total input paths to process : 115/02/2200:04:22 WARN snappy.LoadSnappy: Snappy native library not loaded15/02/2200:04:23 INFO mapred.JobClient: Running job: job_local1887351217_000115/02/2200:04:23 INFO mapred.LocalJobRunner: Waiting for map tasks15/02/2200:04:23 INFO mapred.LocalJobRunner: Starting task:attempt_local1887351217_0001_m_000000_015/02/2200:04:23 INFO mapred.Task: UsingResourceCalculatorPlugin : null15/02/2200:04:23 INFO mapred.MapTask: Processing split: hdfs://192.168.1.100:9000/input/HTTP_20130313143750.dat:0+221415/02/2200:04:23 INFO mapred.MapTask: io.sort.mb = 10015/02/2200:04:23 INFO mapred.MapTask: data buffer = 79691776/9961472015/02/2200:04:23 INFO mapred.MapTask: record buffer = 262144/32768015/02/2200:04:23 INFO mapred.MapTask: Starting flush of map output15/02/2200:04:23 INFO mapred.MapTask: Finished spill 015/02/2200:04:23 INFO mapred.Task: Task:attempt_local1887351217_0001_m_000000_0 isdone. And is in the process of commiting15/02/2200:04:23 INFO mapred.LocalJobRunner:15/02/2200:04:23 INFO mapred.Task: Task 'attempt_local1887351217_0001_m_000000_0' done.15/02/2200:04:23 INFO mapred.LocalJobRunner: Finishing task:attempt_local1887351217_0001_m_000000_015/02/2200:04:23 INFO mapred.LocalJobRunner: Map task executor complete.15/02/2200:04:23 INFO mapred.Task: UsingResourceCalculatorPlugin : null15/02/2200:04:23 INFO mapred.LocalJobRunner:15/02/2200:04:23 INFO mapred.Merger: Merging 1 sorted segments15/02/2200:04:23 INFO mapred.Merger: Down to the last merge-pass, with 1 segments leftof total size: 1011 bytes15/02/2200:04:23 INFO mapred.LocalJobRunner:15/02/2200:04:24 INFO mapred.Task: Task:attempt_local1887351217_0001_r_000000_0 isdone. And is in the process of commiting15/02/2200:04:24 INFO mapred.LocalJobRunner:15/02/2200:04:24 INFO mapred.Task: Task attempt_local1887351217_0001_r_000000_0 isallowed to commit now15/02/2200:04:24 INFO mapred.JobClient: map 100%reduce 0%15/02/2200:04:24 INFO output.FileOutputCommitter: Saved output of task'attempt_local1887351217_0001_r_000000_0' tohdfs://192.168.1.100:9000/output/out0215/02/2200:04:24 INFO mapred.LocalJobRunner: reduce > reduce15/02/2200:04:24 INFO mapred.Task: Task 'attempt_local1887351217_0001_r_000000_0' done.15/02/2200:04:25 INFO mapred.JobClient: map 100%reduce 100%15/02/2200:04:25 INFO mapred.JobClient: Job complete: job_local1887351217_000115/02/2200:04:25 INFO mapred.JobClient: Counters: 1915/02/2200:04:25 INFO mapred.JobClient: FileOutput Format Counters15/02/2200:04:25 INFO mapred.JobClient: BytesWritten=55615/02/2200:04:25 INFO mapred.JobClient: FileInput Format Counters15/02/2200:04:25 INFO mapred.JobClient: BytesRead=221415/02/2200:04:25 INFO mapred.JobClient: FileSystemCounters15/02/2200:04:25 INFO mapred.JobClient: FILE_BYTES_READ=136515/02/2200:04:25 INFO mapred.JobClient: HDFS_BYTES_READ=442815/02/2200:04:25 INFO mapred.JobClient: FILE_BYTES_WRITTEN=14105415/02/2200:04:25 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=55615/02/2200:04:25 INFO mapred.JobClient: Map-Reduce Framework15/02/2200:04:25 INFO mapred.JobClient: Mapoutput materialized bytes=101515/02/2200:04:25 INFO mapred.JobClient: Mapinput records=2215/02/2200:04:25 INFO mapred.JobClient: Reduce shuffle bytes=015/02/2200:04:25 INFO mapred.JobClient: Spilled Records=4415/02/2200:04:25 INFO mapred.JobClient: Mapoutput bytes=96515/02/2200:04:25 INFO mapred.JobClient: Totalcommitted heap usage (bytes)=32387891215/02/2200:04:25 INFO mapred.JobClient: Combine input records=015/02/2200:04:25 INFO mapred.JobClient: SPLIT_RAW_BYTES=12015/02/2200:04:25 INFO mapred.JobClient: Reduce input records=2215/02/2200:04:25 INFO mapred.JobClient: Reduce input groups=2115/02/2200:04:25 INFO mapred.JobClient: Combine output records=015/02/2200:04:25 INFO mapred.JobClient: Reduce output records=2115/02/2200:04:25 INFO mapred.JobClient: Mapoutput records=22
HDFS显示运行结果:
附数据:
格式为:记录报告时间戳、手机号码、AP mac、AC mac、访问的网址、网址种类、上行数据包数、下行数据包数、上行总流量、下行总流量、HTTP Response的状态。
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 2001363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 2001363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 2001363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 2001363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 2001363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 2001363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 2001363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.360.cn 信息安全 20 20 3156 2936 2001363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 2001363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960 690 2001363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 2001363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 2001363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 2001363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 2001363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 2001363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 12 12 3008 3720 2001363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 2001363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 2001363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 2001363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 2 2 120 120 2001363157985079 13823070001 20-7C-8F-70-68-1F:CMCC 120.196.100.99 6 3 360 180 2001363157985069 13600217502 00-1F-64-E2-E8-B1:CMCC 120.196.100.55 18 138 1080 186852 200
0 0
- Hadoop自定义数据类型编程练习
- Hadoop编程-自定义Hadoop数据类型报错:NoSuchMethodException
- hadoop自定义数据类型
- hadoop自定义数据类型
- 【转】自定义Hadoop数据类型
- hadoop 自定义数据类型
- hadoop自定义数据类型
- hadoop自定义数据类型
- hadoop-自定义数据类型
- Hadoop自定义数据类型
- Hadoop自定义数据类型
- Hadoop 自定义数据类型
- hadoop自定义数据类型
- Hadoop 学习自定义数据类型
- hadoop自定义数据类型
- Hadoop 1.x自定义数据类型
- Hadoop 自定义数据类型和自定义排序
- hadoop数据类型,编程作为参考
- MapReduce编程8步骤
- 【Java加密解密】AES加密算法
- 03-3. 12-24小时制
- 03-4. 成绩转换
- android拍照获得图片及获得图片后剪切设置到ImageView
- Hadoop自定义数据类型编程练习
- 懒人日记 之 自己到底有多懒
- Hadoop到底能做什么?怎么用hadoop?
- iOS 5中UIViewController
- vijos 1042 捕风捉影
- 【Java加密解密】PBE算法
- 【JavaScript】JavaScript的对象-对象专门语句
- Windows下QT Creator配置protobuf
- RMQ问题的Sparse-Table算法