在windows远程提交任务给Hadoop集群(Hadoop 2.6)

来源:互联网 发布:mac改了用户名进不去 编辑:程序博客网 时间:2024/05/29 19:17

我使用3台Centos虚拟机搭建了一个Hadoop2.6的集群。希望在windows7上面使用IDEA开发mapreduce程序,然后提交的远程的Hadoop集群上执行。经过不懈的google终于搞定

 
 
开始我使用hadoop的eclipse插件来执行job,竟然成功了,后来发现mapreduce是在本地执行的,根本没有提交到集群上。我把hadoop的4个配置文件加上后就开始出现了问题。
 

1:org.apache.hadoop.util.Shell$ExitCodeException: /bin/bash: line 0: fg: no job control 

网上说要修改源码,在Hadoop2.6已经合并了那个补丁。这个错误怎么解决的也忘记了
 

2:Stack trace: ExitCodeException exitCode=1:

 

3:Error: Could not find or load main class org.apache.hadoop.mapreduce.v2.app.MRAppMaster

 

4:Error: java.lang.RuntimeExceptionjava.lang.ClassNotFoundException: Class WordCount$Map not found

 

 
按照我的步骤走,这些问题都能解决,我使用的IDE是IDEA
1:复制Hadoop的4个配置文件放到src目录下面:core-site.xml,hdfs-site.xml,log4j.properties,mapred-site.xml,yarn-site.xml
 
2:配置mapred-site.xml
复制代码
<configuration>    <property>        <name>mapreduce.framework.name</name>        <value>yarn</value>    </property>    <property>        <name>mapred.remote.os</name>        <value>Linux</value>    </property>    <property>        <name>mapreduce.app-submission.cross-platform</name>        <value>true</value>    </property>    <property>    <name>mapreduce.application.classpath</name>    <value>        /opt/hadoop-2.6.0/etc/hadoop,        /opt/hadoop-2.6.0/share/hadoop/common/*,        /opt/hadoop-2.6.0/share/hadoop/common/lib/*,        /opt/hadoop-2.6.0/share/hadoop/hdfs/*,        /opt/hadoop-2.6.0/share/hadoop/hdfs/lib/*,        /opt/hadoop-2.6.0/share/hadoop/mapreduce/*,        /opt/hadoop-2.6.0/share/hadoop/mapreduce/lib/*,        /opt/hadoop-2.6.0/share/hadoop/yarn/*,        /opt/hadoop-2.6.0/share/hadoop/yarn/lib/*    </value></property>        <property>        <name>mapreduce.jobhistory.address</name>        <value>master:10020</value>    </property>       <property>                <name>mapreduce.jobhistory.webapp.address</name>                <value>master:19888</value>        </property></configuration>
复制代码

 

注意mapreduce.application.classpath一定是绝对路径,不要搞什么$HADOOP_HOME,我这里反正是报错的
 
3:修改yarn-site.xml
  1. 复制代码
    <configuration><!-- Site specific YARN configuration properties -->  <property>        <name>yarn.nodemanager.aux-services</name>        <value>mapreduce_shuffle</value>    </property>    <property>        <name>yarn.resourcemanager.address</name>        <value>master:8032</value>    </property><property>    <name>yarn.application.classpath</name>    <value>        /opt/hadoop-2.6.0/etc/hadoop,        /opt/hadoop-2.6.0/share/hadoop/common/*,        /opt/hadoop-2.6.0/share/hadoop/common/lib/*,        /opt/hadoop-2.6.0/share/hadoop/hdfs/*,        /opt/hadoop-2.6.0/share/hadoop/hdfs/lib/*,        /opt/hadoop-2.6.0/share/hadoop/mapreduce/*,        /opt/hadoop-2.6.0/share/hadoop/mapreduce/lib/*,        /opt/hadoop-2.6.0/share/hadoop/yarn/*,        /opt/hadoop-2.6.0/share/hadoop/yarn/lib/*    </value>  </property></configuration>
    复制代码

     


注意yarn.application.classpath一定是绝对路径,不要搞什么$HADOOP_HOME
 
4:看下我的代码
  1. 复制代码
    package com.gaoxing.hadoop;import java.io.IOException;import java.security.PrivilegedExceptionAction;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.security.UserGroupInformation;import org.apache.hadoop.util.GenericOptionsParser;public class WordCount {    //继承mapper接口,设置map的输入类型为<Object,Text>    //输出类型为<Text,IntWritable>    public static class Map extends Mapper<Object,Text,Text,IntWritable>{        //one表示单词出现一次        private static IntWritable one = new IntWritable(1);        //word存储切下的单词        private Text word = new Text();        public void map(Object key,Text value,Context context) throws IOException,InterruptedException{            //对输入的行切词            StringTokenizer st = new StringTokenizer(value.toString());            while(st.hasMoreTokens()){                word.set(st.nextToken());//切下的单词存入word                context.write(word, one);            }        }    }    //继承reducer接口,设置reduce的输入类型<Text,IntWritable>    //输出类型为<Text,IntWritable>    public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{        //result记录单词的频数        private static IntWritable result = new IntWritable();        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{            int sum = 0;            //对获取的<key,value-list>计算value的和            for(IntWritable val:values){                sum += val.get();            }            //将频数设置到result            result.set(sum);            //收集结果            context.write(key, result);        }    }    /**     * @param args     */    public static void main(String[] args) throws Exception{        Configuration conf = new Configuration();       // conf.set("mapred.remote.os","Linux");       // conf.set("yarn.resourcemanager.address","master:8032");       // conf.set("mapreduce.framework.name","yarn");        conf.set("mapred.jar","D:\\IdeaProjects\\hadooplearn\\out\\artifacts\\hadoo.jar");        //conf.set("mapreduce.app-submission.cross-platform","true");        Job job = Job.getInstance(conf);        job.setJobName("test");        //配置作业各个类        job.setJarByClass(WordCount.class);        job.setMapperClass(Map.class);        job.setCombinerClass(Reduce.class);        job.setReducerClass(Reduce.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        FileInputFormat.addInputPath(job, new Path("hdfs://master:9000/tmp/hbase-env.sh"));        FileOutputFormat.setOutputPath(job, new Path("hdfs://master:9000/tmp/out11"));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}
    复制代码

     


conf.set("mapred.jar","D:\\IdeaProjects\\hadooplearn\\out\\artifacts\\hadoo.jar");这是最重要的一句,不然会报上面第4个问题
 
IDEA中有个功能就是编译的时候打包:

 
下班了。

 





转载至高兴的博客

0 0
原创粉丝点击