hadoop自定义数据类型

来源：互联网发布：淘宝学校排名编辑：程序博客网时间：2024/05/22 06:12

测试类

package com.Datatype;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DataLogs1 {

public static class Mappeer extends Mapper<LongWritable, Text, Log1, Text> {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

System.out.println("******************************");

String logs = value.toString();

String[] datas = logs.split(" ");

System.out.println(datas[datas.length - 2]);

if (datas[datas.length - 2].equals("404")) {

Log1 log = new Log1(logs, datas[3], datas[datas.length - 1]);

System.out.println(log);

context.write(log, new Text("1"));

}

}

}

public static class Reduceer extends Reducer<Log1, Text, Log1, IntWritable> {

@Override

protected void reduce(Log1 log, Iterable<Text> value, Context arg2) throws IOException, InterruptedException {

int sum = 0;

System.out.println(sum);

for (Text text : value) {

sum = sum + Integer.parseInt(text.toString());

}

arg2.write(log, new IntWritable(sum));

}

}

public static void main(String[] args) {

Configuration conf = new Configuration();

conf.set("fs.defaultFS", "hdfs://192.168.10.97:9000/");

Job job;

try {

job = new Job(conf);

job.setJarByClass(DataLogs1.class);

job.setMapperClass(Mappeer.class);

job.setReducerClass(Reduceer.class);

job.setMapOutputKeyClass(Log1.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(Log1.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path("/root/log.txt"));

FileOutputFormat.setOutputPath(job, new Path("/file/test4111"));

System.out.println("3333333333");

try {

System.exit(job.waitForCompletion(true) ? 0 : 1);

// job.waitForCompletion(true);

} catch (ClassNotFoundException e) {

System.out.println("任务提交出了问题 ClassNotFoundException");

e.printStackTrace();

} catch (InterruptedException e) {

System.out.println("任务提交出了问题 InterruptedException");

e.printStackTrace();

}

} catch (IOException e) {

System.out.println("job初始化出了问题");

e.printStackTrace();

}

}

}

自定义数据类型

无参构造方法必须要有：初始化要用

package com.Datatype;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Log1 implements WritableComparable<Log1> {

private String data;

private String time;

private String flag;

public Log1(){}

public Log1(String data, String time, String flag) {

this.data = data;

this.time = time;

this.flag = flag;

}

public String getData() {

return data;

}

public void setData(String data) {

this.data = data;

}

public String getTime() {

return time;

}

public void setTime(String time) {

this.time = time;

}

public String getFlag() {

return flag;

}

public void setFlag(String flag) {

this.flag = flag;

}

@Override

public String toString() {

return data;

}

@Override

public void readFields(DataInput in) throws IOException {

this.data = in.readUTF();

this.time = in.readUTF();

this.flag = in.readUTF();

}

@Override

public void write(DataOutput out) throws IOException {

out.writeUTF(data);

out.writeUTF(time);

out.writeUTF(flag);

}

@Override

public int compareTo(Log1 log) {

if (this.flag.compareTo(log.flag) == 0) {

if (this.time.compareTo(log.time) == 0) {

return 0;

} else {

return this.time.compareTo(log.time) > 0 ? -1 : 1;

}

} else {

return this.flag.compareTo(log.flag) > 0 ? -1 : 1;

}

}

}

阅读全文

0 0