云计算上机实验

来源:互联网 发布:直播系统源码 编辑:程序博客网 时间:2024/05/01 12:25

  1. 安装xshellxftpVMwareVMware的安装序列号为:5A02H-AU243-TZJ49-GTC7K-3C61N

  2. VMware中修改最大内存上限并启动HDP,待完成后根据提示使用xshell进行ssh连接,第一次进入需要修改密码,用户名:root旧密码:hadoop

  1. 使用xftp替换/etc/hadoop/2.3.2.0-2950/0目录下的hdfs-site.xml文件,完成后在xshell中输入“shutdown -r 0” 重启系统

  2. 解压cloudMR.zip, 按照要求编写TitleCount.java中的TODO部分。完成后使用xftpcloudMR整个文件夹传输到HDP系统中

  1. xshell中依次输入以下命令启动程序

    1. cd cloudMR/

    2. chmod 777 submit.sh

    3. ./submit.sh

  2. 待运行结束后若看到以下结果则证明TitleCount成功执行

     

    (若想详细了解向hadoop提交任务的方式,请查看submit.sh

  3. import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;

  4. import java.io.BufferedReader;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.Arrays;
    import java.util.List;
    import java.util.StringTokenizer;

  5. // >>> Don't Change
    public class TitleCount extends Configured implements Tool {
        public static void main(String[] args) throws Exception {
            int res = ToolRunner.run(new Configuration(), new TitleCount(), args);
            System.exit(res);
        }

  6.     @Override
        public int run(String[] args) throws Exception {
            Job job = Job.getInstance(this.getConf(), "Title Count");
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

  7.         job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);

  8.         job.setMapperClass(TitleCountMap.class);
            job.setReducerClass(TitleCountReduce.class);

  9.         FileInputFormat.setInputPaths(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));

  10.         job.setJarByClass(TitleCount.class);
            return job.waitForCompletion(true) ? 0 : 1;
        }

  11.     public static String readHDFSFile(String path, Configuration conf) throws IOException{
            Path pt=new Path(path);
            FileSystem fs = FileSystem.get(pt.toUri(), conf);
            FSDataInputStream file = fs.open(pt);
            BufferedReader buffIn=new BufferedReader(new InputStreamReader(file));

  12.         StringBuilder everything = new StringBuilder();
            String line;
            while( (line = buffIn.readLine()) != null) {
                everything.append(line);
                everything.append("\n");
            }
            return everything.toString();
        }

  13. // <<< Don't Change

  14.     public static class TitleCountMap extends Mapper<Object, Text, Text, IntWritable> {
            List<String> stopWords;
            String delimiters;

  15.         @Override
            protected void setup(Context context) throws IOException,InterruptedException {

  16.             Configuration conf = context.getConfiguration();

  17.             String stopWordsPath = conf.get("stopwords");
                String delimitersPath = conf.get("delimiters");

  18.             this.stopWords = Arrays.asList(readHDFSFile(stopWordsPath, conf).split("\n"));
                this.delimiters = readHDFSFile(delimitersPath, conf);
            }


  19.         @Override
            public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                StringTokenizer stk = new StringTokenizer(value.toString(),delimiters);
                while(stk.hasMoreTokens()){
                    String e = stk.nextToken().trim().toLowerCase();
                    if(stopWords.contains(e) == false){
                        context.write(new Text(e),new IntWritable(1));
                    }
                }

  20.         }
        }

  21.     public static class TitleCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
            @Override
            public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                int sum = 0;
                for(IntWritable e : values){
                    sum += e.get();
                }
                context.write(key, new IntWritable(sum));

  22.         }
        }

1 0
原创粉丝点击