hadoop自定义数据类型
来源:互联网 发布:net域名为什么遇冷 编辑:程序博客网 时间:2024/05/22 07:52
- 自定义数据类型实现实现接口WritableComparable
- 输入数据格式,第一列为班级号,第二列为姓名,第三列为学号
1 小明 10 3 大明 11
1 小王 12 4 大王 13
1 小张 14 6 大张 15
3.输出数据格式 同一个班级在一起
1 学号:14姓名小张 学号:12姓名小王 学号:10姓名小明
3 学号:11姓名大明
4 学号:13姓名大王
6 学号:15姓名大张
4.定义user类
import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.WritableComparable;/** * @author DELL_pc * @date 2017年3月15日 */public class User implements WritableComparable<User>{ int id; String name; int classid; /** * */ public User() { // TODO Auto-generated constructor stub } /** * @param id * @param name * @param classid */ public User(int classid,String name,int id ) { super(); this.id = id; this.name = name; this.classid = classid; } public int getId() { return id; } public void setId(int id) { this.id = id; } public String getName() { return name; } public void setName(String name) { this.name = name; } public int getClassid() { return classid; } public void setClassid(int classid) { this.classid = classid; } @Override public void write(DataOutput out) throws IOException { // TODO Auto-generated method stub out.writeInt(id); out.writeUTF(name); out.writeInt(classid); } @Override public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub id=in.readInt(); name=in.readUTF(); classid=in.readInt(); } @Override public int compareTo(User o) { // TODO Auto-generated method stub return 1; } @Override public String toString() { return "User [id=" + id + ", name=" + name + ", classid=" + classid + "]"; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + classid; result = prime * result + id; result = prime * result + ((name == null) ? 0 : name.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; User other = (User) obj; if (classid != other.classid) return false; if (id != other.id) return false; if (name == null) { if (other.name != null) return false; } else if (!name.equals(other.name)) return false; return true; }}
5.定义map
import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;/** * @author DELL_pc * @date 2017年3月15日 */public class MyMapper extends Mapper<LongWritable, Text, IntWritable, User>{ IntWritable cla=new IntWritable(); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, User>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub StringTokenizer str=new StringTokenizer(value.toString(),"\t"); while (str.hasMoreTokens()) { String con=str.nextToken(); StringTokenizer sTokenizer=new StringTokenizer(con); while (sTokenizer.hasMoreTokens()) { int classid=Integer.valueOf(sTokenizer.nextToken()); String name=sTokenizer.nextToken(); int id=Integer.valueOf(sTokenizer.nextToken()); User user=new User(classid, name, id); cla.set(classid); context.write(cla, user); } } }}
6.定义reducer
import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;/** * @author DELL_pc * @date 2017年3月15日 */public class MyRedecer extends Reducer<IntWritable, User, IntWritable, Text>{ Text text=new Text(); @Override protected void reduce(IntWritable key, Iterable<User> user, Reducer<IntWritable, User, IntWritable, Text>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub StringBuffer stringBuffer=new StringBuffer(); int classid=0; IntWritable intWrit=new IntWritable(); for (User user2 : user) { classid=user2.getClassid(); stringBuffer.append("学号:"+user2.getId()+"姓名"+user2.getName()+"\t"); } intWrit.set(classid); text.set(stringBuffer.toString()); context.write(intWrit,text); }}
7.定义主函数
/** * @author DELL_pc * @date 2017年3月15日 */import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class UserDemo implements Tool { private Configuration conf=null; @Override public Configuration getConf() { // TODO Auto-generated method stub return this.conf; } @Override public void setConf(Configuration that) { // TODO Auto-generated method stub this.conf=that; this.conf.set("fs.defaultFS", "hdfs://192.168.59.130:8020"); } @Override public int run(String[] arg0) throws Exception { // TODO Auto-generated method stub Configuration conf=this.conf; Job job=Job.getInstance(conf,"usercount"); //输入 FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.addInputPath(job, new Path("/user/lei/wordcount/")); //map job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(User.class); //3 //4 job.setReducerClass(MyRedecer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path("/user/lei/wordcount/out")); return job.waitForCompletion(true) ? 0:1; } public static void main(String[] args) throws Exception { //运行程序 long startdate=System.currentTimeMillis(); ToolRunner.run(new UserDemo(), args); long enddate=System.currentTimeMillis(); long costdate=enddate-startdate; System.out.println("花费时间"+costdate+"ms");}}
8.计数器结果
INFO - Counters: 38 File System Counters FILE: Number of bytes read=656 FILE: Number of bytes written=470578 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=162 HDFS: Number of bytes written=144 HDFS: Number of read operations=15 HDFS: Number of large read operations=0 HDFS: Number of write operations=4 Map-Reduce Framework Map input records=3 Map output records=6 Map output bytes=120 Map output materialized bytes=138 Input split bytes=116 Combine input records=0 Combine output records=0 Reduce input groups=4 Reduce shuffle bytes=138 Reduce input records=6 Reduce output records=4 Spilled Records=12 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=45 CPU time spent (ms)=0 Physical memory (bytes) snapshot=0 Virtual memory (bytes) snapshot=0 Total committed heap usage (bytes)=473956352 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=81 File Output Format Counters Bytes Written=144
0 0
- hadoop自定义数据类型
- hadoop自定义数据类型
- 【转】自定义Hadoop数据类型
- hadoop 自定义数据类型
- hadoop自定义数据类型
- hadoop自定义数据类型
- hadoop-自定义数据类型
- Hadoop自定义数据类型
- Hadoop自定义数据类型
- Hadoop 自定义数据类型
- hadoop自定义数据类型
- Hadoop 学习自定义数据类型
- hadoop自定义数据类型
- Hadoop自定义数据类型编程练习
- Hadoop 1.x自定义数据类型
- Hadoop 自定义数据类型和自定义排序
- Hadoop自定义数据类型和输入格式
- Hadoop实战【二、MapReduce+自定义数据类型】
- linux命令行
- java第三天--cmd编译找不到符号解决及对象的转型
- cadence allegro 布线小技巧
- vmware workstation+ubuntu16.04突然不能联网了!
- 浏览器兼容之旅的第一站:如何创建条件样式
- hadoop自定义数据类型
- PHP7.1 狐教程
- 显著性检测的AC与MSSS算法
- mysql 字符集总结
- 共享内存—内存映射mmap
- js跨域
- C++模板实现稀疏表
- 开发webapi程序时需要考虑的细节汇总
- 前端解决跨域问题的8种方案(最新最全)