Hadoop 实例1---通过采集的气象数据分析每年的最高温度

来源:互联网 发布:电脑无法连接网络 编辑:程序博客网 时间:2024/04/30 17:17

1.原始数据分析

0067011990999991950051507004888888889999999N9+00001+99999999999999999999990067011990999991950051512004888888889999999N9+00221+99999999999999999999990067011990999991950051518004888888889999999N9-00111+99999999999999999999990067011990999991949032412004888888889999999N9+01111+99999999999999999999990067011990999991950032418004888888880500001N9+00001+99999999999999999999990067011990999991950051507004888888880500001N9+00781+9999999999999999999999

数据说明:
1.第15-19个字符是year
2.第45-50位是温度表示,+表示零上, -表示零下,且温度的值不能是9999,9999表示异常数据 第50位值只能是0、1、4、5、9几个数字

2.写Mapper程序:

package cn.edu.bjut.temperautre;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class TemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable> {    private static final Integer ERROR_TEMPER = 9999;    @Override    protected void map(LongWritable key, Text value, Context context)            throws IOException, InterruptedException {        String content = value.toString();        String year = content.substring(15, 19);    //获取year        Integer temperature  = null;    //获取温度        if('+' == content.charAt(45)) {            temperature = Integer.parseInt(content.substring(46, 50));        } else {            temperature = Integer.parseInt(content.substring(45, 50));        }        if(temperature <= ERROR_TEMPER && content.substring(50, 51).matches("[01459]")) {            context.write(new Text(year), new IntWritable(temperature));        }    }}

3.写Reducer程序:

package cn.edu.bjut.temperautre;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class TemperatureReducer extends        Reducer<Text, IntWritable, Text, IntWritable> {    @Override    protected void reduce(Text key, Iterable<IntWritable> values, Context context)            throws IOException, InterruptedException {        int maxTemperature = Integer.MIN_VALUE;        for(IntWritable intWritable : values) {            maxTemperature = Math.max(maxTemperature, intWritable.get());        }        context.write(key, new IntWritable(maxTemperature));    }}

4.写主程序:

package cn.edu.bjut.temperautre;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MainJob {    public static void main(String[] args) throws Exception {        if(2 != args.length) {            System.out.println("Usage: MaxTemperature<input path> <output path>");            System.exit(-1);        }        Configuration conf = new Configuration();        Job job = new Job(conf, "temperature");        job.setJarByClass(MainJob.class);        job.setMapperClass(TemperatureMapper.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(IntWritable.class);        job.setReducerClass(TemperatureReducer.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        FileInputFormat.addInputPath(job, new Path(args[0]));        Path outPath = new Path(args[1]);        FileSystem fileSystem = FileSystem.get(conf);        if(fileSystem.exists(outPath)) {            fileSystem.delete(outPath, true);        }        FileOutputFormat.setOutputPath(job, outPath);        System.exit(job.waitForCompletion(true)?1:0);    }}

5.打成jar,只需要打包对应的源代码即可,上传到linux服务器
6.创建文件source.txt 并把需要分析的文本数据copy到该文件中, 执行 hadoop fs -put source.txt /user/root/data/1/source.txt 将文件存放在hdfs中。(这里所提供的目录仅供参考,可以自己定义的)
7.hadoop jar one.jar /user/root/data/1 /result_one

0 0