从装机到hadoop完全分布式--系统配置

来源:互联网 发布:ubuntu iso 编辑:程序博客网 时间:2024/06/05 06:42


ubuntu13.04下载地址

http://releases.ubuntu.com/13.04/

新建用户组和用户:

sudo addgroup hadoop

sudo adduser -ingroup hadoop hadoop

授权:

sudo gedit /etc/sudoers

添加下面代码

hadoop  ALL=(ALL:ALL) ALL

===========================

hosts:

127.0.0.1    localhost
192.168.18.220 master
192.168.66.130    node0
192.168.66.129  ubuntu
# The following lines are desirable for IPv6 capable hosts
::1     ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters

===========================

export JAVA_HOME=/home/hadoop/jdk1.7.0_45
export JRE_HOME=${JAVA_HOME}/jre
export ZOOKEEPER_HOME=/home/hadoop/hadoop-2.2.0/zookeeper-3.4.5
export HADOOP_HOME=/home/hadoop/hadoop-2.2.0
export HADOOP_2_HOME=/home/hadoop/hadoop-0.20.2
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:${ZOOKEEPER_HOME}/lib:

export PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:${JAVA_HOME}/bin:${ZOOKEEPER_HOME}/bin :${HADOOP_HOME}/bin:${HADOOP_2_HOME}/bin"

 

source /etc/profile

sudo update-alternatives --install /usr/bin/java java /usr/lib/java/jdk7/bin/java 300

sudo update-alternatives --install /usr/bin/javac javac /usr/lib/java/jdk7/bin/javac 300

 

 

.bashrc

sudo source /etc/profile

====================================================================================================================================

#更新下载源

更新下载源

====================================================================================

#装机后的推荐

装机后该做的6件事

装机后该做的20件事

====================================================================================

#安装QQ


====================================================================================

#安装VM

http://download.pchome.net/system/sysenhance/detail-75584.htmlVM下载地址

安装VM参考链接

http://www.th7.cn/system/lin/201210/32989.shtml

====================================================================================

#安装jdk

sudo apt-get install openjdk-7-jdk


http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html官方地址

自己下载JDK:

tar zxvf  jdk.tar.zg -C /home/hadoop/

sudo update-alternatives --install /usr/bin/java java /home/hadoop/jdk1.7.0_45/bin/java 300

sudo update-alternatives --install /usr/bin/javac javac /home/hadoop/jdk1.7.0_45/bin/javac 300

sudo update-alternatives --config java


 

====================================================================================

#安装eclipse

http://www.eclipse.org/downloads/packages/release/europa/wintereclipse地址

cd ~

mkdir java

#配置

tar xvfz  ~/download/eclipse-jee-europa-winter-linux-gtk-x86_64.tar.gz  -C ~/java


cd ~/java/eclipse

sudo gedit eclipse.desktop

#加入下面信息

[Desktop Entry]
Name=eclipse
Name[zh_CN]=eclipse
Comment=eclipse Client
Exec=/home/xiaomao/java/eclipse/eclipse
Icon=/home/xiaomao/java/eclipse/icon.xpm
Terminal=false
Type=Application
Categories=Application;
Encoding=UTF-8
StartupNotify=true

sudo cp ~/java/eclipse/eclipse.desktop /usr/share/applications

====================================================================================

#安装mysql

sudo apt-get install mysql-client-core-5.5

http://www.mysql.com/products/connector/mysql 驱动

====================================================================================

#修改主机名

sudo gedit /etc/hosts

sudo gedit /etc/hostname (非Root权限环境下)

#允许其他用户访问图形界面

xhost +

====================================================================================

SSH参考文章

http://blog.lizhigang.net/archives/249点击打开链接



#配置ssh

查看ssh运行状态

 service ssh status

启动或重启 SSH

sudo service ssh start

或者

sudo /etc/init.d/ssh restart


 ssh-keygen  -t  rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keyschmod 600 ~/.ssh/authorized_keys

 #删除ssh

sudo apt-get remove ssh

sudo apt-get remove openssh-server

sudo apt-get remove openssh-client

#安装ssh

sudo apt-get install ssh

#节点登陆

scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub

cat ~/.ssh/master_rsa.pub >>  ~/.ssh/authorized_keys

#自我登陆

scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub

cat ~/.ssh/master_rsa.pub >>  ~/.ssh/authorized_keys

#主点登陆

scp hadoop@ubuntu:~/.ssh/id_rsa.pub ~/.ssh/ubuntu_rsa.pub

cat ~/.ssh/ubuntu_rsa.pub >>  ~/.ssh/authorized_keys

====================================================================================

#下载hadoop,hbase,hive

解压 hadoop,hbase,hive

并修改配置文件

http://www.linuxidc.com/Linux/2013-02/79661.htmHBASE配置

====================================================================================

#hadoop0.20.2CDH下载

http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlhadoop0.20.2CDH

#sqoop下载

http://archive.cloudera.com/cdh/3/sqoop-1.2.0-CDH3B4.tar.gzsqoop-1.2.0下载

#sqoop配置安装

http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlsqoop安装

====================================================================================

分发文件

scp -r ~/hadoop hadoop@node1:~

====================================================================================

hadoop配置:

1,修改hadoop-env.sh文件:

export JAVA_HOME=/home/hadoop/jdk1.7.0_45

2,修改yarn-env.sh文件:

export JAVA_HOME=/home/hadoop/jdk1.7.0_45

3,修改core-site.xml文件:

<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><configuration xmlns:xi="http://www.w3.org/2001/XInclude"><xi:include href="cmt.xml"/><!--设置缺省的目录前缀--><property><name>fs.defaultFS</name><value>viewfs://ns1</value></property><property>        <name>io.file.buffer.size</name>         <value>131072</value> </property><!--JournalNode 所在节点上的一个目录,用于存放 editlog 和其他状态信息。该参数只能设置一个目录,你可以对磁盘做 RIAD 提高数据可靠性。--><property><name>dfs.journalnode.edits.dir</name><value>/home/hadoop/hadoop-2.2.0/journal</value></property><property>  <name>hadoop.tmp.dir</name>  <value>/home/hadoop/hadoop-2.2.0/tmp</value> </property><property>      <name>fs.default.name</name>      <value>hdfs://master1:9000</value>      </property>    <property><name>hadoop.proxyuser.hadoop.hosts</name><value>master1</value></property><property><name>hadoop.proxyuser.hadoop.groups</name><value>*</value></property></configuration>
4,修改hdfs-site.xml

<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><configuration>   <property>      <name>dfs.replication</name>      <value>1</value>   </property><property>      <name>dfs.permissions</name>      <value>false</value>  </property> <!--HDFS 命名服务的逻辑名称,可用户自己定义,比如 mycluster,注意,该名称将被基于 HDFS 的系统使用,比如 Hbase 等,此外,需要你想启用 HDFS Federation,可以通过该参数指定多个逻辑名称,并用“,”分割。--><property><name>dfs.nameservices</name><value>ns1</value><description>Logical name for this new nameservice</description></property><!--dfs.ha.namenodes.[$nameservice ID]:某个命名服务下包含的 NameNode 列表,可为每个 NameNode 指定一个自定义的 ID 名称,比如命名服务 mycluster 下有两个 NameNode,分别命名为 nn1 和 nn2,--><property><name>dfs.ha.namenodes.ns1</name><value>nn1,nn2</value><description>Unique identifiers for each NameNode in the nameservice</description></property><!--dfs.namenode.rpc-address.[$nameservice ID].[$name node ID]为每个 NameNode 设置 RPC 地址--><property>    <name>dfs.namenode.rpc-address.ns1.nn1</name>    <value>master1:9000</value></property><!--dfs.namenode.http-address.[$nameservice ID].[$name node ID]为每个 NameNode 设置对外的 HTTP 地址--><property>    <name>dfs.namenode.http-address.ns1.nn1</name>    <value>master1:50070</value></property><property>    <name>dfs.namenode.rpc-address.ns1.nn2</name>    <value>master1-s:9000</value></property><property>    <name>dfs.namenode.http-address.ns1.nn2</name>    <value>master1-s:50070</value></property><!--设置一组 journalNode 的 URI 地址,active NameNode 将 edit log 写入这些JournalNode,而 standby NameNode 读取这些 edit log,并作用在内存中的目录树中,该属性值应符合以下格式:qjournal://host1:port1;host2:port2;host3:port3/journalId     --><property><name>dfs.namenode.shared.edits.dir</name><value>qjournal://master1:8485;master1-s:8485;slave1:8485/ns1</value></property><!--设置切换模式--><property>    <name>dfs.ha.automatic-failover.enabled</name>    <value>true</value>    <description>设置为true时,表示自动切换</description></property><!--设置客户端与 active NameNode 进行交互的 Java 实现类,DFS 客户端通过该类寻找当前的active NameNode。该类可由用户自己实现,默认实现为 ConfiguredFailoverProxyProvider。--><property><name>dfs.client.failover.proxy.provider.ns1</name><value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value></property><!--为了解决脑裂的问题:sshfence 通过 ssh 登录到前一个 active NameNode 并将其杀死。为了让该机制成功执行,需配置免密码 ssh 登陆,这可通过参数 dfs.ha.fencing.ssh.private-key-files 设置一个私钥文件。--><property><name>dfs.ha.fencing.methods</name><value>sshfence</value></property><property><name>dfs.ha.fencing.ssh.private-key-files</name><value>/home/hadoop/.ssh/id_rsa</value></property><!--<property><name>dfs.ha.fencing.methods</name><value>sshfence([[username][:port]])</value></property><property><name>dfs.ha.fencing.ssh.connect-timeout</name><value>30000</value></property>--><!--Shell方式--><!--<property><name>dfs.ha.fencing.methods</name><value>shell(/path/to/my/script.sh arg1 arg2 ...)</value></property>--><!--zookeeper--><property>    <name>ha.zookeeper.quorum</name>    <value>master1:2181,master1-s:2181,slave1:2181</value>    <description>指定用于HA的ZooKeeper集群机器列表</description></property><!--<property>    <name>ha.zookeeper.session-timeout.ms</name>    <value>5000</value>    <description>指定ZooKeeper超时间隔,单位毫秒</description></property>--><property>                     <name>dfs.webhdfs.enabled</name>                     <value>true</value>         </property></configuration>
5,修改yarn-site.xml

<?xml version="1.0"?><configuration><!--resourcemanager对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等--><property><name>yarn.resourcemanager.address</name><value>master1:18032</value></property><!--resourcemanager对application暴露的访问地址。applicationMaster通过该地址向RM申请资源,释放资源等--><property><name>yarn.resourcemanager.scheduler.address</name><value>master1:18030</value></property><!--resourcemanager对NodeManager暴露的地址。NodeManager通过该地址向RM汇报心跳,领取任务等--><property><name>yarn.resourcemanager.resource-tracker.address</name><value>master1:18033</value></property><!--处理来自NodeManager的RPC请求的Handler数目--><!--<property><name>yarn.resourcemanager.resource-tracker.client.thread-count</name><value>50</value></property>--><!--处理来自ApplicationMaster的RPC请求的Handler数目--><!--<property><name>yarn.resourcemanager.scheduler.client.thread-count</name><value>50</value></property>--><!--resourcemanager对管理员暴露的访问地址,管理员通过该地址向RM发送管理命令--><property><name>yarn.resourcemanager.admin.address</name><value>master1:18087</value></property><!--resourcemanager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息--><property><name>yarn.resourcemanager.webapp.address</name><value>master1:18088</value></property><!--启用的资源调度器主类,目前可用的有FIFO,Capacity Scheduler 和 Fair Scheduler--><property><name>yarn.resourcemanager.scheduler.class</name><value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value></property><!--NodeManager心跳间隔--><!--<property><name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name><value>1000</value></property>--><!--NodeManager相关参数配置--><!--NodeManager上运行的附属服务--><property><name>yarn.nodemanager.aux-services</name><value>mapreduce_shuffle</value></property></configuration>
6,修改cmt.xml

<?xml version="1.0"?><?xml-stylesheet type="text/xsl" href="configuration.xsl"?><configuration><!--客户端挂载表,其内容就是虚拟路径到具体某个NS及其物理子目录的映射关系,-->    <property>        <name>fs.viewfs.mounttable.cmt.link./user</name>        <value>hdfs://ns1/user</value>    </property>    <property>        <name>fs.viewfs.mounttable.cmt.link./fee</name>        <value>hdfs://ns1/fee</value>    </property></configuration>

7,修改slaves文件:

slave1


原创粉丝点击