Hadoop集群配置详细版1.0.3

来源:互联网 发布:郭嘉怎么死的知乎 编辑:程序博客网 时间:2024/05/17 18:19

之前配置Hadoop的很多步骤不太详细,配置文件的许多优化点也没太搞清楚,重新写一下。

2012.06.22更新:Hadoop版本兼容到1.0.3。

0、ssh免密码登录

view source
print?
1ssh-keygen -t rsa -P""
2cat$HOME/.ssh/id_rsa.pub >> $HOME/.ssh/authorized_keys
3echo"StrictHostKeyChecking no">> ~/.ssh/config

1、安装JDK7

view source
print?
01#下载 && 解压缩 && 安装
02wget http://download.oracle.com/otn-pub/java/jdk/7u2-b13/jdk-7u2-linux-i586.tar.gz
03tar-xzf jdk-7u2-linux-i586.tar.gz
04mv./jdk1.7.0_02 ~/jdk
05  
06#配置JAVA_HOME环境变量
07vim ~/.bashrc
08exportJAVA_HOME=/home/hadoop/jdk/
09exportJAVA_BIN=/home/hadoop/jdk/bin
10exportPATH=$JAVA_HOME/bin:$PATH
11exportCLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

2、安装Hadoop(0.23.0)

view source
print?
01#安装解压缩Hadoop
02wget http://labs.mop.com/apache-mirror/hadoop/common/hadoop-1.0.3/hadoop-1.0.3-bin.tar.gz
03tar-xzvf hadoop-1.0.3-bin.tar.gz
04mv./hadoop-1.0.3 ~/hadoop_home
05  
06#创建运行时目录
07cd~/hadoop_home
08mkdirvar
09cdvar
10mkdirtmp mapred hdfs
11cdhdfs
12mkdirname data
13  
14#导出Java_HOME
15cd~/hadoop_home/conf/
16vim ./hadoop-env.sh
17exportJAVA_HOME=/home/hadoop/jdk/

更新:注意权限,新版本中,所有HDFS目录权限务必是755,不能是775。

view source
print?
1chmod755 data name

3、准备环境变量

主要是HADOOP_HOME,在1.0之后,还要这个参数

view source
print?
1exportHADOOP_HOME=/home/hadoop/hadoop_home/
2exportHADOOP_HOME_WARN_SUPPRESS=1

4、配置hosts(Linux和Hadoop)

view source
print?
01#配置每个结点上的hosts文件
02sudovim /etc/hosts
03#Hosts for hadoop
0410.70.0.101 hadoop1
0510.70.0.102 hadoop2
06......
07  
08#配置masters和slaves
09vim ~/hadoop_home/conf
10vim masters
11hadoop1
12vim slaves
13hadoop1
14hadoop2
15......

5、配置文件:

参数详细配置参考:http://hadoop.apache.org/common/docs/current/cluster_setup.html

core-site.xml

view source
print?
01<?xmlversion="1.0"?>
02<?xml-stylesheettype="text/xsl"href="configuration.xsl"?>
03<configuration>
04<property>
05  <name>fs.default.name</name>
06  <value>hdfs://hadoop1:54310</value>
07</property>
08<property>
09  <name>hadoop.tmp.dir</name>
10  <value>/home/hadoop/hadoop_home/var/tmp</value>
11</property>
12<!--Following use more memory but speed up more  -->
13<property>
14  <name>fs.inmemory.size.mb</name>
15  <value>200</value>
16</property>
17<property>
18  <name>io.sort.factor</name>
19  <value>100</value>
20</property>
21<property>
22  <name>io.sort.mb</name>
23  <value>200</value>
24</property>
25</configuration>

hdfs-site.xml

view source
print?
01<?xmlversion="1.0"?>
02<?xml-stylesheettype="text/xsl"href="configuration.xsl"?>
03<configuration>
04<property>
05  <name>dfs.replication</name>
06  <value>3</value>
07</property>
08<property>
09  <name>dfs.data.dir</name>
10  <value>/home/hadoop/hadoop_home/var/hdfs/data</value>
11</property>
12<property>
13  <name>dfs.name.dir</name>
14  <value>/home/hadoop/hadoop_home/var/hdfs/name</value>
15</property>
16<!--Here is 128MB !! -->
17<property>
18  <name>dfs.block.size</name>
19  <value>134217728</value>
20</property>
21<!--Parrel RPC Handler for namenode-->
22<property>
23    <name>dfs.namenode.handler.count</name>
24    <value>40</value>
25</property>
26</configuration>

mapred-site.xml

view source
print?
01<?xmlversion="1.0"?>
02<?xml-stylesheettype="text/xsl"href="configuration.xsl"?>
03<configuration>
04<property>
05  <name>mapred.job.tracker</name>
06  <value>hadoop1:54311</value>
07</property>
08<property>
09    <name>mapred.reduce.parallel.copies</name>
10    <value>20</value>
11</property>
12<property>
13  <name>mapred.local.dir</name>
14  <value>/home/hadoop/hadoop_home/var/mapred</value>
15</property>
16<property>
17  <name>mapred.tasktracker.map.tasks.maximum</name>
18  <value>12</value>
19</property>
20<property>
21  <name>mapred.tasktracker.reduce.tasks.maximum</name>
22  <value>6</value>
23</property>
24<!--Following use more memory but speed up more  -->
25<property>
26  <name>mapred.map.child.java.opts</name>
27  <value>-Xmx512M</value>
28</property>
29<property>
30  <name>mapred.reduce.child.java.opts</name>
31  <value>-Xmx512M</value>
32</property>
33</configuration>

6、格式化namenode

view source
print?
1cdbin
2./hadoop namenode -format

7、启动Hadoop

view source
print?
01cd~/hadoop_home/bin
02./start-all.sh
03  
04#查看启动情况
05jps
067532 SecondaryNameNode
077346 NameNode
087433 DataNode
097605 JobTracker
107759 Jps
117701 TaskTracker

GUI:http://localhost:50030 (集群)
GUI:http://hadoop1:50070(HDFS)

8、其他说明:

mapred.tasktracker.map.tasks.maximum 每个结点最多运行多少个map
mapred.tasktracker.reduce.tasks.maximum 每个结点最多运行多少个reduce

原创粉丝点击