mapreduce自定义输入
来源:互联网 发布:南京三宝科技 知乎 编辑:程序博客网 时间:2024/06/02 00:07
驱动类
- import java.io.IOException;
- import java.net.URI;
- import javax.xml.soap.Text;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- /**
- * desc:Custom Data Types <code>TestPoint3DInputFormat</code>
- *
- * @author chenwq
- */
- public class TestPoint3DInputFormat {
- /**
- * @param args
- * @throws IOException
- * @throws ClassNotFoundException
- * @throws InterruptedException
- */
- public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
- // TODO Auto-generated method stub
- System.out.println("hello,chenwq!");
- Job job=new Job();
- Configuration conf=new Configuration();
- FileSystem fs=FileSystem.get(URI.create(args[1]), conf);
- fs.delete(new Path(args[1]));
- job.setJobName("测试MyInputFormat程序。。。。。");
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- job.setInputFormatClass(Point3DinputFormat.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Point3D.class);
- job.setMapperClass(Point3DMapper.class);
- job.setNumReduceTasks(0);
- job.waitForCompletion(false);
- }
- }
2 自定义类型Point3D必须实现WritableComparable接口,才能在Hadoop环境中传输
- import java.io.DataInput;
- import java.io.DataOutput;
- import java.io.IOException;
- import org.apache.hadoop.io.WritableComparable;
- /**
- * desc:Custom Data Types <code>Point</code>
- *
- * @author chenwq
- */
- public class Point3D implements WritableComparable {
- public float x;
- public float y;
- public float z;
- public Point3D(float x, float y, float z) {
- this.x = x;
- this.y = y;
- this.z = z;
- }
- public Point3D() {
- this(0.0f, 0.0f, 0.0f);
- }
- public void set(float x, float y, float z) {
- this.x = x;
- this.y = y;
- this.z = z;
- }
- public void write(DataOutput out) throws IOException {
- out.writeFloat(x);
- out.writeFloat(y);
- out.writeFloat(z);
- }
- public void readFields(DataInput in) throws IOException {
- x = in.readFloat();
- y = in.readFloat();
- z = in.readFloat();
- }
- public String toString() {
- return Float.toString(x) + ", " + Float.toString(y) + ", "
- + Float.toString(z);
- }
- public float distanceFromOrigin() {
- return (float) Math.sqrt(x * x + y * y + z * z);
- }
- public int compareTo(Object other) {
- float myDistance = this.distanceFromOrigin();
- float otherDistance = ((Point3D) other).distanceFromOrigin();
- return Float.compare(myDistance, otherDistance);
- }
- public boolean equals(Object o) {
- Point3D other = (Point3D) o;
- if (!(other instanceof Point3D)) {
- return false;
- }
- return this.x == other.x && this.y == other.y && this.z == other.z;
- }
- public int hashCode() {
- return Float.floatToIntBits(x) ^ Float.floatToIntBits(y)
- ^ Float.floatToIntBits(z);
- }
- }
- import java.io.IOException;
- import java.util.StringTokenizer;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FSDataInputStream;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.InputSplit;
- import org.apache.hadoop.mapreduce.JobContext;
- import org.apache.hadoop.mapreduce.RecordReader;
- import org.apache.hadoop.mapreduce.TaskAttemptContext;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.FileSplit;
- import org.apache.hadoop.util.LineReader;
- public class Point3DinputFormat extends FileInputFormat<Text, Point3D> {
- @Override
- protected boolean isSplitable(JobContext context, Path filename) {
- // TODO Auto-generated method stub
- return false;
- }
- @Override
- public RecordReader<Text, Point3D> createRecordReader(InputSplit inputsplit,
- TaskAttemptContext context) throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- return new objPosRecordReader();
- }
- public static class objPosRecordReader extends RecordReader<Text,Point3D>{
- public LineReader in;
- public Text lineKey;
- public Point3D lineValue;
- public StringTokenizer token=null;
- public Text line;
- @Override
- public void close() throws IOException {
- // TODO Auto-generated method stub
- }
- @Override
- public Text getCurrentKey() throws IOException, InterruptedException {
- //lineKey.set(token.nextToken());
- return lineKey;
- }
- @Override
- public Point3D getCurrentValue() throws IOException,
- InterruptedException {
- // TODO Auto-generated method stub
- return lineValue;
- }
- @Override
- public float getProgress() throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- return 0;
- }
- @Override
- public void initialize(InputSplit input, TaskAttemptContext context)
- throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- FileSplit split=(FileSplit)input;
- Configuration job=context.getConfiguration();
- Path file=split.getPath();
- FileSystem fs=file.getFileSystem(job);
- FSDataInputStream filein=fs.open(file);
- in=new LineReader(filein,job);
- line=new Text();
- lineKey=new Text();
- lineValue=new Point3D();
- }
- @Override
- public boolean nextKeyValue() throws IOException, InterruptedException {
- // TODO Auto-generated method stub
- int linesize=in.readLine(line);
- if(linesize==0)
- return false;
- String[] pieces = line.toString().split(",");
- if(pieces.length != 4){
- throw new IOException("Invalid record received");
- }
- // try to parse floating point components of value
- float fx, fy, fz;
- try{
- fx = Float.parseFloat(pieces[1].trim());
- fy = Float.parseFloat(pieces[2].trim());
- fz = Float.parseFloat(pieces[3].trim());
- }catch(NumberFormatException nfe){
- throw new IOException("Error parsing floating poing value in record");
- }
- lineKey.set(pieces[0]);
- lineValue.set(fx, fy, fz);
- return true;
- }
- }
- }
- import java.io.IOException;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Mapper;
- public class Point3DMapper extends Mapper<Text, Point3D, Text, Point3D>{
- protected void map(Text key, Point3D value, Context context) throws IOException, InterruptedException{
- context.write(key, value);
- }
- }
0 0
- mapreduce自定义输入
- MapReduce自定义输入格式
- 自定义 hadoop MapReduce InputFormat 切分输入文件
- 自定义 hadoop MapReduce InputFormat 切分输入文件
- 自定义 hadoop MapReduce InputFormat 切分输入文件
- 自定义 hadoop MapReduce InputFormat 切分输入文件
- 自定义 hadoop MapReduce InputFormat 切分输入文件
- Hadoop中MapReduce自定义输入格式
- Mapreduce的输入格式
- mapreduce 多种输入
- Mapreduce的输入格式
- mapreduce 多种输入
- MapReduce输入路径
- mapreduce 多种输入
- MapReduce多种输入格式
- MapReduce之多数据源输入
- 2.1MapReduce输入
- MapReduce对输入多文件的处理2自定义FileInputFormat类
- Linux export命令
- Pinyin4j 详解及使用
- 矩阵交换行
- gconf-editor配置CentOS文件夹侧窗格显示
- VC 纯资源DLL制作及调用
- mapreduce自定义输入
- 文件操作函数 C语言 (FILE fputc fgetc fputs fgets fscanf fprintf)
- 异常细胞检测
- gco-v3.0使用
- Android ApiDemo解析-AnimationCloning类
- ADB server didn't ACK 解决方法
- 古剑奇谭——手中执剑 方能保护所珍惜之人
- JAVCC语法文件 官方文档翻译
- 第10周项目4-大奖赛计分