hadoop--单表关联
来源:互联网 发布:气象观测数据怎么画图 编辑:程序博客网 时间:2024/05/17 23:54
给出child-parent表,输出grandchild-grandparent表child parentTom LucyTom JackJone LucyLucy MaryLucy BenJack AliceJack JesseTerry AliceTerry JessePhilip TerryPhilip AlmaMark TerryMark Alma
代码:
import java.io.IOException;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class STjoin { public static int time=0; public static class Map extends Mapper<LongWritable, Text, Text, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String childname=new String(); String parentname=new String(); String relationtype=new String(); String line=value.toString(); int i=0; while(line.charAt(i)!=' '){ i++; } String[] values={line.substring(0, i),line.substring(i+1)}; if(values[0].compareTo("child")!=0){ childname=values[0]; parentname=values[1]; relationtype="1";//左右表区分标志 context.write(new Text(values[1]), new Text(relationtype+"+"+childname+"+"+parentname)); //右表 relationtype="2"; context.write(new Text(values[0]), new Text(relationtype+"+"+childname+"+"+parentname)); } } } public static class Reduce extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException { if(time==0){ //输出表头 context.write(new Text("grandchild"), new Text("grandparent")); time++; } int grandchildnum=0; String grandchild[]=new String[10]; int grandparentnum=0; String grandparent[]=new String[10]; Iterator iterator=values.iterator(); while(iterator.hasNext()){ String record=iterator.next().toString(); int len=record.length(); int i=2; if(len==0) continue; char relationtype=record.charAt(0); String childname=new String(); String parentname=new String(); //获取value-list中value的child while(record.charAt(i)!='+'){ childname=childname+record.charAt(i); i++; } i=i+1;//越过加号 //获取value-list中value的parent while(i<len){ parentname=parentname+record.charAt(i); i++; } System.out.println("childname="+childname+" parentname="+parentname); //左表,取出child放入grandchild if(relationtype=='1'){ grandchild[grandchildnum]=childname; grandchildnum++; }else{//右表,取出parent放入 grandparent grandparent[grandparentnum]=parentname; grandparentnum++; } } //grandchild和grandparent数组求笛卡儿积 if(grandparentnum!=0&&grandchildnum!=0){ System.out.println("******执行成功************"); for(int m=0;m<grandchildnum;m++){ for(int n=0;n<grandparentnum;n++){ context.write(new Text(grandchild[m]), new Text(grandparent[n])); } } } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); Job job=new Job(conf, "STJOIN"); job.setJarByClass(STjoin.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path("/input/st")); FileOutputFormat.setOutputPath(job, new Path("/output/st")); System.out.println(job.waitForCompletion(true) ? 0:1); }}
结果:
0 0
- hadoop单表关联
- hadoop单表关联
- hadoop--单表关联
- hadoop学习--单表关联
- hadoop学习--单表关联
- Hadoop 单表关联 多表关联
- Hadoop单表与多表关联
- hadoop编程实例--单表关联
- Hadoop MapReduce单表关联程序
- Hadoop 2.x 单表关联
- 【Hadoop基础教程】6、Hadoop之单表关联查询
- hadoop实现单表和多表关联
- hadoop实现单表和多表关联
- Hadoop案例之单表关联输出祖孙关系
- MapReduce单表关联
- 单表关联
- 单表关联
- hadoop多表关联
- removeDuplicates
- 经典软件体系结构风格(三)
- 转载:Unit Test图示
- 关于MemStore的刷新方式
- PAT 1004. Counting Leaves (30)
- hadoop--单表关联
- eclipse+pydev 官网下载+安装(win7)
- Ajax Jquery 提交验证页面数据
- c++第五次实验—项目一
- 绘制余弦函数图像
- C语言常用库函数(含详细用法)
- 接口类和抽象类的区别
- 不想使用Externalizable,但想拥有像writeExternal,readExternal两方法。(长时间保存对象方法三)
- [leetcode108]Convert Sorted Array to Binary Search Tree