从装机到hadoop完全分布式
来源:互联网 发布:悟空助手mac电脑版 编辑:程序博客网 时间:2024/06/05 08:15
ubuntu13.04下载地址
http://releases.ubuntu.com/13.04/
新建用户组和用户:
sudo addgroup hadoop
sudo adduser -ingroup hadoop hadoop
授权:
sudo gedit /etc/sudoers
添加下面代码
hadoop ALL=(ALL:ALL) ALL
===========================
配置hosts文件
hosts:
127.0.0.1 localhost
192.168.18.220 master
192.168.66.130 node0
192.168.66.129 ubuntu
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
===========================
配置环境变量:
export JRE_HOME=${JAVA_HOME}/jre
export ZOOKEEPER_HOME=/home/hadoop/hadoop-2.2.0/zookeeper-3.4.5
export HADOOP_HOME=/home/hadoop/hadoop-2.2.0
export HADOOP_2_HOME=/home/hadoop/hadoop-0.20.2
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:${ZOOKEEPER_HOME}/lib:
export PATH="$PATH:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:${JAVA_HOME}/bin:${ZOOKEEPER_HOME}/bin :${HADOOP_HOME}/bin:${HADOOP_2_HOME}/bin"
source /etc/profile
sudo update-alternatives --install /usr/bin/java java /usr/lib/java/jdk7/bin/java 300
sudo update-alternatives --install /usr/bin/javac javac /usr/lib/java/jdk7/bin/javac 300
更新配置
.bashrc
sudo source /etc/profile
====================================================================================================================================
#更新下载源
更新下载源
====================================================================================
#装机后的推荐
装机后该做的6件事
装机后该做的20件事
====================================================================================
#安装QQ
====================================================================================
#安装VM
http://download.pchome.net/system/sysenhance/detail-75584.htmlVM下载地址
安装VM参考链接
http://www.th7.cn/system/lin/201210/32989.shtml
====================================================================================
#安装jdk
sudo apt-get install openjdk-7-jdk
安装自己的JDK:
http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html官方地址
自己下载JDK:
tar zxvf jdk.tar.zg -C /home/hadoop/
sudo update-alternatives --install /usr/bin/java java /home/hadoop/jdk1.7.0_45/bin/java 300
sudo update-alternatives --install /usr/bin/javac javac /home/hadoop/jdk1.7.0_45/bin/javac 300
sudo update-alternatives --config java
====================================================================================
#安装eclipse
http://www.eclipse.org/downloads/packages/release/europa/wintereclipse地址
cd ~
mkdir java
#配置
tar xvfz ~/download/eclipse-jee-europa-winter-linux-gtk-x86_64.tar.gz -C ~/java
给eclipse做图标
cd ~/java/eclipse
sudo gedit eclipse.desktop
#加入下面信息
[Desktop Entry]
Name=eclipse
Name[zh_CN]=eclipse
Comment=eclipse Client
Exec=/home/xiaomao/java/eclipse/eclipse
Icon=/home/xiaomao/java/eclipse/icon.xpm
Terminal=false
Type=Application
Categories=Application;
Encoding=UTF-8
StartupNotify=true
sudo cp ~/java/eclipse/eclipse.desktop /usr/share/applications
====================================================================================
#安装mysql
sudo apt-get install mysql-client-core-5.5
http://www.mysql.com/products/connector/mysql 驱动
====================================================================================
#修改主机名
sudo gedit /etc/hosts
sudo gedit /etc/hostname (非Root权限环境下)
#允许其他用户访问图形界面
xhost +
====================================================================================
SSH参考文章
http://blog.lizhigang.net/archives/249点击打开链接
#配置ssh
查看ssh运行状态
service ssh status
启动或重启 SSH
sudo service ssh start
或者
sudo /etc/init.d/ssh restart
ssh-keygen -t rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keyschmod 600 ~/.ssh/authorized_keys
#删除ssh
sudo apt-get remove ssh
sudo apt-get remove openssh-server
sudo apt-get remove openssh-client
#安装ssh
sudo apt-get install ssh
#节点登陆
scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub
cat ~/.ssh/master_rsa.pub >> ~/.ssh/authorized_keys
#自我登陆
scp hadoop@master:~/.ssh/id_rsa.pub ~/.ssh/master_rsa.pub
cat ~/.ssh/master_rsa.pub >> ~/.ssh/authorized_keys
#主点登陆
scp hadoop@ubuntu:~/.ssh/id_rsa.pub ~/.ssh/ubuntu_rsa.pub
cat ~/.ssh/ubuntu_rsa.pub >> ~/.ssh/authorized_keys
====================================================================================
#下载hadoop,hbase,hive
解压 hadoop,hbase,hive
并修改配置文件
http://www.linuxidc.com/Linux/2013-02/79661.htmHBASE配置
====================================================================================
#hadoop0.20.2CDH下载
http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlhadoop0.20.2CDH
#sqoop下载
http://archive.cloudera.com/cdh/3/sqoop-1.2.0-CDH3B4.tar.gzsqoop-1.2.0下载
#sqoop配置安装
http://www.cnblogs.com/L-aho/archive/2012/12/07/2807366.htmlsqoop安装
====================================================================================
分发文件
scp -r ~/hadoop hadoop@node1:~
====================================================================================
hadoop配置:
1,修改hadoop-env.sh文件:
export JAVA_HOME=/home/hadoop/jdk1.7.0_45
2,修改yarn-env.sh文件:
export JAVA_HOME=/home/hadoop/jdk1.7.0_45
3,修改core-site.xml文件:
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration xmlns:xi="http://www.w3.org/2001/XInclude">
- <xi:include href="cmt.xml"/>
- <!--设置缺省的目录前缀-->
- <property>
- <name>fs.defaultFS</name>
- <value>viewfs://ns1</value>
- </property>
- <property>
- <name>io.file.buffer.size</name>
- <value>131072</value>
- </property>
- <!--JournalNode 所在节点上的一个目录,用于存放 editlog 和其他状态信息。该参数只能设
- 置一个目录,你可以对磁盘做 RIAD 提高数据可靠性。-->
- <property>
- <name>dfs.journalnode.edits.dir</name>
- <value>/home/hadoop/hadoop-2.2.0/journal</value>
- </property>
- <property>
- <name>hadoop.tmp.dir</name>
- <value>/home/hadoop/hadoop-2.2.0/tmp</value>
- </property>
- <property>
- <name>fs.default.name</name>
- <value>hdfs://master1:9000</value>
- </property>
- <property>
- <name>hadoop.proxyuser.hadoop.hosts</name>
- <value>master1</value>
- </property>
- <property>
- <name>hadoop.proxyuser.hadoop.groups</name>
- <value>*</value>
- </property>
- </configuration>
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration>
- <property>
- <name>dfs.replication</name>
- <value>1</value>
- </property>
- <property>
- <name>dfs.permissions</name>
- <value>false</value>
- </property>
- <!--HDFS 命名服务的逻辑名称,可用户自己定义,比如 mycluster,注意,该名称将被基
- 于 HDFS 的系统使用,比如 Hbase 等,此外,需要你想启用 HDFS Federation,可以通过该
- 参数指定多个逻辑名称,并用“,”分割。-->
- <property>
- <name>dfs.nameservices</name>
- <value>ns1</value>
- <description>Logical name for this new nameservice</description>
- </property>
- <!--dfs.ha.namenodes.[$nameservice ID]:
- 某个命名服务下包含的 NameNode 列表,可为每个 NameNode 指定一个自定义的 ID 名
- 称,比如命名服务 mycluster 下有两个 NameNode,分别命名为 nn1 和 nn2,-->
- <property>
- <name>dfs.ha.namenodes.ns1</name>
- <value>nn1,nn2</value>
- <description>Unique identifiers for each NameNode in the nameservice
- </description>
- </property>
- <!--dfs.namenode.rpc-address.[$nameservice ID].[$name node ID]
- 为每个 NameNode 设置 RPC 地址-->
- <property>
- <name>dfs.namenode.rpc-address.ns1.nn1</name>
- <value>master1:9000</value>
- </property>
- <!--dfs.namenode.http-address.[$nameservice ID].[$name node ID]
- 为每个 NameNode 设置对外的 HTTP 地址-->
- <property>
- <name>dfs.namenode.http-address.ns1.nn1</name>
- <value>master1:50070</value>
- </property>
- <property>
- <name>dfs.namenode.rpc-address.ns1.nn2</name>
- <value>master1-s:9000</value>
- </property>
- <property>
- <name>dfs.namenode.http-address.ns1.nn2</name>
- <value>master1-s:50070</value>
- </property>
- <!--设置一组 journalNode 的 URI 地址,active NameNode 将 edit log 写入这些
- JournalNode,而 standby NameNode 读取这些 edit log,并作用在内存中的目录树中,该属性
- 值应符合以下格式:
- qjournal://host1:port1;host2:port2;host3:port3/journalId -->
- <property>
- <name>dfs.namenode.shared.edits.dir</name>
- <value>qjournal://master1:8485;master1-s:8485;slave1:8485/ns1</value>
- </property>
- <!--设置切换模式-->
- <property>
- <name>dfs.ha.automatic-failover.enabled</name>
- <value>true</value>
- <description>设置为true时,表示自动切换</description>
- </property>
- <!--设置客户端与 active NameNode 进行交互的 Java 实现类,DFS 客户端通过该类寻找当前的
- active NameNode。
- 该类可由用户自己实现,
- 默认实现为 ConfiguredFailoverProxyProvider。-->
- <property>
- <name>dfs.client.failover.proxy.provider.ns1</name>
- <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
- </property>
- <!--为了解决脑裂的问题:sshfence 通过 ssh 登录到前一个 active NameNode 并将其杀死。
- 为了让该机制成功执行,
- 需配置免密码 ssh 登陆,
- 这可通过参数 dfs.ha.fencing.ssh.private-key-files 设置一个私钥文件。-->
- <property>
- <name>dfs.ha.fencing.methods</name><value>sshfence</value>
- </property>
- <property>
- <name>dfs.ha.fencing.ssh.private-key-files</name>
- <value>/home/hadoop/.ssh/id_rsa</value>
- </property>
- <!--
- <property>
- <name>dfs.ha.fencing.methods</name>
- <value>sshfence([[username][:port]])</value>
- </property>
- <property>
- <name>dfs.ha.fencing.ssh.connect-timeout</name>
- <value>30000</value>
- </property>
- -->
- <!--Shell方式-->
- <!--
- <property>
- <name>dfs.ha.fencing.methods</name>
- <value>shell(/path/to/my/script.sh arg1 arg2 ...)</value>
- </property>
- -->
- <!--zookeeper-->
- <property>
- <name>ha.zookeeper.quorum</name>
- <value>master1:2181,master1-s:2181,slave1:2181</value>
- <description>指定用于HA的ZooKeeper集群机器列表</description>
- </property>
- <!--
- <property>
- <name>ha.zookeeper.session-timeout.ms</name>
- <value>5000</value>
- <description>指定ZooKeeper超时间隔,单位毫秒</description>
- </property>
- -->
- <property>
- <name>dfs.webhdfs.enabled</name>
- <value>true</value>
- </property>
- </configuration>
- <?xml version="1.0"?>
- <configuration>
- <!--resourcemanager对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等-->
- <property>
- <name>yarn.resourcemanager.address</name>
- <value>master1:18032</value>
- </property>
- <!--resourcemanager对application暴露的访问地址。applicationMaster通过该地址向RM申请资源,释放资源等-->
- <property>
- <name>yarn.resourcemanager.scheduler.address</name>
- <value>master1:18030</value>
- </property>
- <!--resourcemanager对NodeManager暴露的地址。NodeManager通过该地址向RM汇报心跳,领取任务等-->
- <property>
- <name>yarn.resourcemanager.resource-tracker.address</name>
- <value>master1:18033</value>
- </property>
- <!--处理来自NodeManager的RPC请求的Handler数目-->
- <!--
- <property>
- <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
- <value>50</value>
- </property>
- -->
- <!--处理来自ApplicationMaster的RPC请求的Handler数目-->
- <!--
- <property>
- <name>yarn.resourcemanager.scheduler.client.thread-count</name>
- <value>50</value>
- </property>
- -->
- <!--resourcemanager对管理员暴露的访问地址,管理员通过该地址向RM发送管理命令-->
- <property>
- <name>yarn.resourcemanager.admin.address</name>
- <value>master1:18087</value>
- </property>
- <!--resourcemanager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息-->
- <property>
- <name>yarn.resourcemanager.webapp.address</name>
- <value>master1:18088</value>
- </property>
- <!--启用的资源调度器主类,目前可用的有FIFO,Capacity Scheduler 和 Fair Scheduler-->
- <property>
- <name>yarn.resourcemanager.scheduler.class</name>
- <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
- </property>
- <!--NodeManager心跳间隔-->
- <!--
- <property>
- <name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
- <value>1000</value>
- </property>
- -->
- <!--NodeManager相关参数配置-->
- <!--NodeManager上运行的附属服务-->
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle</value>
- </property>
- </configuration>
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration>
- <!--客户端挂载表,其内容就是虚拟路径到具体某个NS及其物理子目录的映射关系,-->
- <property>
- <name>fs.viewfs.mounttable.cmt.link./user</name>
- <value>hdfs://ns1/user</value>
- </property>
- <property>
- <name>fs.viewfs.mounttable.cmt.link./fee</name>
- <value>hdfs://ns1/fee</value>
- </property>
- </configuration>
7,修改slaves文件:
slave1
- 从装机到hadoop完全分布式--系统配置
- 从装机到hadoop完全分布式
- hadoop从入门到精通第一节Hadoop完全分布式安装部署
- Java之美[从菜鸟到高手演练]之Linux下Hadoop的完全分布式安装
- Hadoop完全分布式配置
- Hadoop完全分布式配置
- hadoop完全分布式配置
- hadoop完全分布式实践
- hadoop完全分布式配置
- Hadoop完全分布式配置
- Hadoop完全分布式配置
- Hadoop完全分布式配置
- hadoop完全分布式安装
- Hadoop完全分布式配置
- Hadoop完全分布式配置
- hadoop完全分布式
- Hadoop完全分布式配置
- 完全分布式安全hadoop
- C语言中空格和\0的区别
- windows环境变量的备份与恢复
- hdu1401 Solitaire
- 用prctl给线程命名 【源码实现】
- PL/SQL Developer登录不进去,总是报用户名密码有误
- 从装机到hadoop完全分布式
- Sql Server 生成32位uuid
- C++回顾之运算符重载
- JVM调优总结
- JBPM4环境搭建
- axis 部署及获取webService接口
- Cron Expressions——Cron 表达式
- 关于元素定位的问题
- django和mezzanine搭建博客小站1