hadoop2 单机搭建

来源:互联网 发布:python 有序字典 编辑:程序博客网 时间:2024/05/18 00:25

Hadoop2.2.0环境配置说明(多节点配置在最后)

 

1. 关闭防火墙# chkconfig iptables off

2. 检查状态#chkconfig –list|grep iptables  off即可

3. 将hadoop-2.2.0.tar.gz文件复制到/usr/local目录下

4. 解压# tar –zxvf hadoop-2.2.0.tar.gz

5. 改名 # mv Hadoop-2.2.0 hadoop2.2

6. 修改环境变量 # vim /etc/profile

7. 添加 export HADOOP_HOME=/usr/local/hadoop2.2

     export HADOOP_MAPRED_HOME=$HADOOP_HOME

     export HADOOP_COMMON_HOME=$HADOOP_HOME

     export HADOOP_HDFS_HOME=$HADOOP_HOME 

     export YARN_HOME=$HADOOP_HOME

     export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop

PATH下添加:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

8. 重载# source /etc/profile

9. 目录切换到 # cd /usr/local/hadoop2.2/etc/Hadoop

10. 在如下文件中添加对应内容

11. hadoop-env.sh  27行修改为

export JAVA_HOME=/usr/local/jdk1.6

12. yarn-env.sh 23行修改为

export JAVA_HOME=/usr/local/jdk1.6

13. 将mapred-site.xml.template 复制为mapred-site.xml

# cp mapred-site.xml.template mapred-site.xml

14. mapred-site.xml中第20(configuration标签内)添加

<property>

    <name>mapreduce.framework.name</name> 

    <value>yarn</value>

</property>

<!--

<property>

    <name>mapreduce.cluster.temp.dir</name>

    <value></value>

    <description>No description</description>

    <final>true</final>

  </property>

  <property>

    <name>mapreduce.cluster.local.dir</name>

    <value></value>

    <description>No description</description>

    <final>true</final>

  </property>

-->

15. yarn-site.xml18(configuration标签内)添加

<property>

<name>yarn.resourcemanager.hostname</name>

    <value>localhost</value>

    <description>hostanem of RM</description>

</property>

<property>

<name>yarn.resourcemanager.resource-tracker.address</name>

<value>localhost:5274</value>

<description>host is the hostname of the resource manager and

    port is the port on which the NodeManagers contact the Resource Manager.

</description>

</property>

<property>

    <name>yarn.resourcemanager.scheduler.address</name>

    <value>localhost:5273</value>

    <description>host is the hostname of the resourcemanager and port is the port

    on which the Applications in the cluster talk to the Resource Manager.

</description>

</property>

 <property>

    <name>yarn.resourcemanager.scheduler.class</name>

<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>

    <description>In case you do not want to use the default scheduler</description>

</property>

<property>

    <name>yarn.resourcemanager.address</name>

    <value>localhost:5271</value>

    <description>the host is the hostname of the ResourceManager and the port is the port on which the clients can talk to the Resource Manager. </description>

</property> 

<property>

    <name>yarn.nodemanager.local-dirs</name>

    <value></value>

    <description>the local directories used by the nodemanager</description>

</property>

<property>

   <name>yarn.nodemanager.address</name>

   <value>localhost:5272</value>

   <description>the nodemanagers bind to this port</description>

</property>  

<property>

   <name>yarn.nodemanager.resource.memory-mb</name>

   <value>10240</value>

  <description>the amount of memory on the NodeManager in GB</description>

</property>

<property>

   <name>yarn.nodemanager.remote-app-log-dir</name>

    <value>/app-logs</value>

    <description>directory on hdfs where the application logs are moved to </description>

</property> 

<property>

    <name>yarn.nodemanager.log-dirs</name>

    <value>/usr/log</value>

    <description>the directories used by Nodemanagers as log directories</description>

</property>

<property>

   <name>yarn.nodemanager.aux-services</name>

    <value>mapreduce_shuffle</value>

    <description>shuffle service that needs to be set for Map Reduce to run </description>

</property>

16. core-site.xml20(configuration标签内)添加

<property>

     <name>hadoop.tmp.dir</name>

     <value>/usr/local/hadoop2.2/tmp</value>

</property>

<property> 

     <name>fs.defaultFS</name> 

     <value>hdfs://localhost:9000</value> 

     <final>true</final> 

</property>

17. hdfs-site.xml20(configuration标签内)添加

<property>

<name>dfs.namenode.name.dir</name>

<value>file:///dfs/name</value>

<final>true</final>

</property>

<property>

 <name>dfs.datanode.data.dir</name>

  <value>file:///dfs/data</value>

 <final>true</final>

 </property>

<property>

   <name>dfs.replication</name>

   <value>1</value>

</property>

<property>

  <name>dfs.permissions.enabled</name>

  <value>false</value>

</property>

 

hadoop-env.sh里面# export JAVA_HOME=/usr/local/jdk

 

18. 设置ssh   

# ssh-keygen –t rsa 一路回车默认值

进行查看 (应有id_rsaid_rsa.pub一对密钥文件)

# cd ~/.ssh

# ls 

复制出公钥

# cp id_rsa.pub authorized_keys

查看# ls  应有三个文件了

确认过程

# ssh localhost (输入yes)

# exit

# ssh localhost

19. 格式化 

# hadoop namenode –format

20. 启动 

#start-dfs.sh

#start-yarn.sh

21. 查看 # jps 应有6

22. 用自带浏览器,能打开http://localhost:50070/ 和http://localhost:8088/cluster即可

(多机环境配置)

23. DNS配置(建议修改之后重启虚拟机)

24. 将主机名换为hadoop2

# vim /etc/sysconfig/network

2行,localhost.localdomain改为hadoop2

25. 修改地址映射

# vim /etc/hosts

第三行添加192.168.100.11 hadoop2 (地址不固定)

0 0