hadoop的InputFormat简单demo
来源:互联网 发布:喷神james 知乎 编辑:程序博客网 时间:2024/06/05 07:59
1.序列化对象
package com.lijie.inutformat;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.WritableComparable;public class ScorePair implements WritableComparable<ScorePair>{ private float a; private float b; private float c; private float d; private float e; public float getA() { return a; } public void setA(float a) { this.a = a; } public float getB() { return b; } public void setB(float b) { this.b = b; } public float getC() { return c; } public void setC(float c) { this.c = c; } public float getD() { return d; } public void setD(float d) { this.d = d; } public float getE() { return e; } public void setE(float e) { this.e = e; } public ScorePair() { super(); // TODO Auto-generated constructor stub } public ScorePair(float a, float b, float c, float d, float e) { super(); this.a = a; this.b = b; this.c = c; this.d = d; this.e = e; } public void set(float a, float b, float c, float d, float e) { this.a = a; this.b = b; this.c = c; this.d = d; this.e = e; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + Float.floatToIntBits(a); result = prime * result + Float.floatToIntBits(b); result = prime * result + Float.floatToIntBits(c); result = prime * result + Float.floatToIntBits(d); result = prime * result + Float.floatToIntBits(e); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ScorePair other = (ScorePair) obj; if (Float.floatToIntBits(a) != Float.floatToIntBits(other.a)) return false; if (Float.floatToIntBits(b) != Float.floatToIntBits(other.b)) return false; if (Float.floatToIntBits(c) != Float.floatToIntBits(other.c)) return false; if (Float.floatToIntBits(d) != Float.floatToIntBits(other.d)) return false; if (Float.floatToIntBits(e) != Float.floatToIntBits(other.e)) return false; return true; } @Override public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub a=in.readFloat(); b=in.readFloat(); c=in.readFloat(); d=in.readFloat(); e=in.readFloat(); } @Override public void write(DataOutput out) throws IOException { // TODO Auto-generated method stub out.writeFloat(a); out.writeFloat(b); out.writeFloat(c); out.writeFloat(d); out.writeFloat(e); } @Override public int compareTo(ScorePair o) { // TODO Auto-generated method stub return 0; }}
2.InputFormat类
package com.lijie.inutformat;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.util.LineReader;public class ScoreInputFormat extends FileInputFormat<Text, ScorePair>{ @Override protected boolean isSplitable(JobContext context, Path filename) { // TODO Auto-generated method stub return false; } @Override public RecordReader<Text, ScorePair> createRecordReader( InputSplit arg0, TaskAttemptContext arg1) throws IOException, InterruptedException { // TODO Auto-generated method stub return new ScoreRecordReader(); }}class ScoreRecordReader extends RecordReader<Text, ScorePair>{ private LineReader in; private Text lineKey; private ScorePair lineValue; private Text line; @Override public void close() throws IOException { // TODO Auto-generated method stub if(in !=null){ in.close(); } } @Override public Text getCurrentKey() throws IOException, InterruptedException { // TODO Auto-generated method stub return lineKey; } @Override public ScorePair getCurrentValue() throws IOException, InterruptedException { // TODO Auto-generated method stub return lineValue; } @Override public float getProgress() throws IOException, InterruptedException { // TODO Auto-generated method stub return 0; } @Override public void initialize(InputSplit arg0, TaskAttemptContext arg1) throws IOException, InterruptedException { FileSplit split = (FileSplit)arg0; Configuration conf = arg1.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fileIn = fs.open(path); in = new LineReader(fileIn,conf); line = new Text(); lineKey = new Text(); lineValue = new ScorePair(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { int lineSize = in.readLine(line); if(lineSize == 0) return false; String[] split = line.toString().split("\\s+"); if(split.length != 7){ throw new IOException("数据错误!"); } float a,b,c,d,e; a = Float.parseFloat(split[2].trim()); b = Float.parseFloat(split[3].trim()); c = Float.parseFloat(split[4].trim()); d = Float.parseFloat(split[5].trim()); e = Float.parseFloat(split[6].trim()); lineKey.set(split[0]+"\t"+split[1]); lineValue.set(a, b, c, d, e); return true; }}
3.mapreduce程序
package com.lijie.inutformat;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class ScoreMapReduce extends Configured implements Tool { public static void main(String[] args) throws Exception { String[] path = {"hdfs://lijie:9000/score/*","hdfs://lijie:9000/score/out"}; int run = ToolRunner.run(new Configuration(), new ScoreMapReduce(), path); System.exit(run); } public static class ScoreMap extends Mapper<Text, ScorePair, Text, ScorePair> { @Override protected void map(Text key, ScorePair value, Context context) throws IOException, InterruptedException { context.write(key, value); } } public static class ScoreReduce extends Reducer<Text, ScorePair, Text, Text> { @Override protected void reduce( Text key, Iterable<ScorePair> values, Context context) throws IOException, InterruptedException { ScorePair value = values.iterator().next(); //sum float sum = value.getA()+value.getB()+value.getC()+value.getD()+value.getE(); //avg float avg = sum/5; context.write(key, new Text("sum:"+sum+"\t"+"avg:"+avg)); } } @Override public int run(String[] arg) throws Exception { Configuration conf = new Configuration(); Path path = new Path(arg[1]); FileSystem fs = path.getFileSystem(conf); if(fs.isDirectory(path)){ fs.delete(path, true); } Job job = new Job(conf, "score"); job.setJarByClass(ScoreMapReduce.class); job.setMapperClass(ScoreMap.class); job.setReducerClass(ScoreReduce.class); job.setInputFormatClass(ScoreInputFormat.class); FileInputFormat.addInputPath(job, new Path(arg[0])); FileOutputFormat.setOutputPath(job, path); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScorePair.class); job.waitForCompletion(true); return 0; }}
0 0
- hadoop的InputFormat简单demo
- 自定义hadoop的inputformat
- 自定义hadoop的InputFormat
- 关于Hadoop的InputFormat类
- 关于Hadoop的InputFormat类
- Hadoop的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop中常用的InputFormat,OutPutFormat类
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop开发常用的InputFormat和OutputFormat
- Hadoop中常用的InputFormat、OutputFormat
- hadoop的序列化简单demo
- Hadoop InputFormat浅析
- 沙场9
- 常见排序算法之归并排序
- Android App的架构设计:从VM、MVC、MVP到MVVM
- spark streaming programming guide 快速开始(二)
- 第七周项目4——队列数组
- hadoop的InputFormat简单demo
- 沙场12
- android 项目常用工具方法
- oracle数据库对象
- 单片机按键去抖原理
- 欢迎使用CSDN-markdown编辑器
- 简单的 websocket
- 实现把一个音频文件的视频抽取出来
- SPSS-数据处理-数据变量