hadoop自定义数据类型

来源:互联网 发布:淘宝学校排名 编辑:程序博客网 时间:2024/05/22 06:12
测试类
package com.Datatype;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DataLogs1 {
public static class Mappeer extends Mapper<LongWritable, Text, Log1, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("******************************");
String logs = value.toString();
String[] datas = logs.split(" ");
System.out.println(datas[datas.length - 2]);
if (datas[datas.length - 2].equals("404")) {
Log1 log = new Log1(logs, datas[3], datas[datas.length - 1]);
System.out.println(log);
context.write(log, new Text("1"));
}
}
}

public static class Reduceer extends Reducer<Log1, Text, Log1, IntWritable> {

@Override
protected void reduce(Log1 log, Iterable<Text> value, Context arg2) throws IOException, InterruptedException {
int sum = 0;
System.out.println(sum);
for (Text text : value) {
sum = sum + Integer.parseInt(text.toString());
}
arg2.write(log, new IntWritable(sum));

}
}

public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.10.97:9000/");
Job job;
try {
job = new Job(conf);
job.setJarByClass(DataLogs1.class);
job.setMapperClass(Mappeer.class);
job.setReducerClass(Reduceer.class);
job.setMapOutputKeyClass(Log1.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Log1.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("/root/log.txt"));
FileOutputFormat.setOutputPath(job, new Path("/file/test4111"));
System.out.println("3333333333");
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
// job.waitForCompletion(true);
} catch (ClassNotFoundException e) {
System.out.println("任务提交出了问题 ClassNotFoundException");
e.printStackTrace();
} catch (InterruptedException e) {
System.out.println("任务提交出了问题 InterruptedException");
e.printStackTrace();
}
} catch (IOException e) {
System.out.println("job初始化出了问题");
e.printStackTrace();
}

}

}

自定义数据类型
无参构造方法必须要有:初始化要用
package com.Datatype;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Log1 implements WritableComparable<Log1> {

private String data;
private String time;
private String flag;

public Log1(){}
public Log1(String data, String time, String flag) {
this.data = data;
this.time = time;
this.flag = flag;
}


public String getData() {
return data;
}

public void setData(String data) {
this.data = data;
}

public String getTime() {
return time;
}

public void setTime(String time) {
this.time = time;
}

public String getFlag() {
return flag;
}

public void setFlag(String flag) {
this.flag = flag;
}

@Override
public String toString() {
return data;
}

@Override
public void readFields(DataInput in) throws IOException {

this.data = in.readUTF();
this.time = in.readUTF();
this.flag = in.readUTF();
}

@Override
public void write(DataOutput out) throws IOException {

out.writeUTF(data);
out.writeUTF(time);
out.writeUTF(flag);
}

@Override
public int compareTo(Log1 log) {
if (this.flag.compareTo(log.flag) == 0) {
if (this.time.compareTo(log.time) == 0) {
return 0;
} else {
return this.time.compareTo(log.time) > 0 ? -1 : 1;
}

} else {
return this.flag.compareTo(log.flag) > 0 ? -1 : 1;
}
}

}