MapReduce 杂记
来源:互联网 发布:网络黑白txt下载 编辑:程序博客网 时间:2024/05/19 09:13
集群版本:Hadoop 2.7.2
JAVA版本:1.8.0_91
编辑器:Eclipse 4.4.2
默认了解MapReduce机制和基本编程
import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.RawComparator;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MapReduceEx { // Map public static class Map extends Mapper<LongWritable, Text, Text, Text> { public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException { context.write(ivalue, new Text()); } } // Reduce public static class Reduce extends Reducer<Text, Text, Text, Text> { public void reduce(Text key, Iterable<Text> value, Context context) throws IOException, InterruptedException { for (Text text : value) { context.write(text, new Text()); } } } // 重构Writable类 public static class myWritableComparable implements WritableComparable<myWritableComparable> { private Text first; private Text second; public void set(Text first, Text second) { this.first = first; this.second = second; } public Text getFirst() { return this.first; } public Text getSecond() { return this.second; } @Override public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub this.first.readFields(in); this.second.readFields(in); // INT = in.readInt() // STRING = in.readLine() } @Override public void write(DataOutput out) throws IOException { // TODO Auto-generated method stub this.first.write(out); this.second.write(out); // out.writeInt(INT); // out.writeChars(STRING); } @Override public int compareTo(myWritableComparable o) { // TODO Auto-generated method stub int result = this.first.compareTo(o.first); if (result == 0) { result = this.second.compareTo(o.second); } return result; } } // 自定义分组 public static class myRawComparator implements RawComparator<myWritableComparable> { @Override public int compare(myWritableComparable o1, myWritableComparable o2) { // TODO Auto-generated method stub Text a = o1.getFirst(); Text b = o2.getFirst(); return a.compareTo(b); } @Override public int compare(byte[] arg0, int arg1, int arg2, byte[] arg3, int arg4, int arg5) { // TODO Auto-generated method stub /** * arg0 表示第1个参与比较的字节数组 arg1 表示第1个字节数组中开始比较的位置 arg2 * 表示第1个字节数组参与比较的字节长度 arg3 表示第2个参与比较的字节数组 arg4 表示第2个字节数组中开始比较的位置 * arg5 表示第2个字节数组参与比较的字节长度 */ return WritableComparator .compareBytes(arg0, arg1, 8, arg3, arg4, 8); } } // 自定义分区 public class myPartitioner extends Partitioner<Text, Text> { @Override public int getPartition(Text key, Text value, int numReduceTasks) { // TODO Auto-generated method stub return (key.toString().hashCode() & Integer.MAX_VALUE) % numReduceTasks; } } // 参数设定 public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); @SuppressWarnings("deprecation") Job job = new Job(conf, "First_Cleaning"); // 指定main函数所子类 job.setJarByClass(MapReduceEx.class); // 指定Map类 job.setMapperClass(Map.class); // 指定map输出key的类 job.setMapOutputKeyClass(Text.class); // 指定map输出value的类 job.setMapOutputValueClass(Text.class); // 指定Reduce类 job.setReducerClass(Reduce.class); // 指定reduce输出key的类 job.setOutputKeyClass(Text.class); // 指定reduce输出value的类 job.setOutputValueClass(Text.class); // 指定分组类 job.setGroupingComparatorClass(myRawComparator.class); // 指定分区类 job.setPartitionerClass(myPartitioner.class); // 指定Combiner类,对map输出进行归约 job.setCombinerClass(null); // 指定CombinerKey分组类 job.setCombinerKeyGroupingComparatorClass(null); // 指定排序时候所使用的比较器 job.setSortComparatorClass(null); // 指定输入路径 FileInputFormat.addInputPath(job, new Path("")); // 指定输出路径 FileOutputFormat.setOutputPath(job, new Path("")); // 提交任务 System.exit(job.waitForCompletion(true) ? 0 : 1); }}
1 0
- MapReduce 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 杂记
- 数据库中索引的优缺点和分类
- Weex项目初始化weex-iOS集成
- Android Studio NDK 入门教程(3)--Java与C++之间的类型签名
- 使用CooCox的CoIDE打开mbed项目失败:提示 The chip:STM32F103RB information of current project has sonething wrong
- 猎豹浏览器访问webapp报HTTP405错误
- MapReduce 杂记
- 面试题29:找到数组中出现次数超过一半的数字(java)
- 【数据结构】堆、堆排序笔记
- Java基础之(三十五)输入输出<一>
- JAVA 网络编程(6) Netty TCP 示例
- Qt中QLabel的字符串连接
- js判断浏览器 微信
- jdbc连接oracle数据库
- C compiler cannot create executables checking whether the C compiler works... no