大数据-Hadoop学习笔记03

来源：互联网发布：彩票数据分析软件开发编辑：程序博客网时间：2024/05/16 13:05

8.hadoop脚本分析

hadoop脚本在../hadoop/sbin目录下，也可以通过which cmd查看脚本路径

1.star-all.sh

#!/usr/bin/env bashecho "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"bin=`dirname "${BASH_SOURCE-$0}"` //通过环境变量BASH_SOURCE获取路径，如果不存在获取$0，即文件本身路径bin=`cd "$bin"; pwd` //获取绝对路径DEFAULT_LIBEXEC_DIR="$bin"/../libexec  //获取上级目录libexecHADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}    //类似于-，变量替换. $HADOOP_LIBEXEC_DIR/hadoop-config.sh //执行hadoop-config.sh脚本，设置变量# start hdfs daemons if hdfs is presentif [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then  "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR //调用./sbin/start-dfs.sh,启动hdfsfi# start yarn daemons if yarn is presentif [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then  "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR   //调用./start-yarn.sh，启动yarnfi

2.hadoop-config.sh

    设置各种环境变量    HADOOP_CONF_DIR    HEAP_SIZE=1000m    CLASSPATH

3.star-dfs.sh –config $HADOOP_CONF_DIR //启动hdfs

    1.libexec/hdfs-config.sh  //设置环境变量    2.hdfs getconf -namenodes  //获取namenode主机名    3."./sbin/hadoop-damons.sh " \  //启动namenode        --config "HADOOP_CONF_DIR" \        --hostname "NAMENODES" \        --script "$bin/hdfs" start namenode $nameStartOpt    4."./sbin/hadoop-damons.sh " \  //启动datanode        --config "HADOOP_CONF_DIR" \        --script "$bin/hdfs" start datanode $nameStartOpt    5."./sbin/hadoop-damons.sh " \  //启动secondarynamenode        --config "HADOOP_CONF_DIR" \        --hostname "SECONDARY_NAMENODES" \        --script "$bin/hdfs" start secondaryname

4.hdfs-config.sh

    最终还是会调用 hadoop-config.sh

5../sbin/hadoop-damons.sh //启动守护进程

    1.hadoop-config.sh    2.循环slaves文件，通过ssh方式登录远程主机，执行相应命令      exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-              daemon.sh" --config $HADOOP_CONF_DIR "$@"

6../sbin/hadoop-damon.sh

    1.hadoop-config.sh    2../bin/hdfs     //调用具体的类启动进程

7../bin/hadoop

    1.hadoop-config.sh    2.调用java类

8../bin/hdfs

    1.hadoop-config.sh    2.调用java类

9.hadoop常用命令和操作

1.集群基本命令：    1.格式化文件系统        hadoop namenode -format    2.单节点启动datanode/namenode        hadoop-dameon.sh start datanode        hadoop-dameon.sh start namenode    3.启动集群datanode        hadoop-dameons.sh start datanode        hadoop-dameons.sh start namenode    4.启动yarn        start-yarn.sh    5.启动hdfs        start-dfs.sh2.HDFS基本命令(和linux命令基本一致)    1.上传文件        hadoop fs -put/hdfs dfs -put     2.重命名        hadoop fs -mv/hdfs dfs -mv    3.下载        hadoop fs -get/hdfs dfs -get    4.删除文件        hadoop fs -rm -r/hdfs dfs -rm -r    5.递归查看文件        hadoop fs -lsr/hdfs dfs -lsr    6.在hdfs上进行文件复制        hadoop fs -cp/hdfs dfs -cp    7.修改文件权限        hadoop fs -chmod/hdfs dfs -chmod    8.修改用户组        hadoop fs -chown/hdfs dfs -chown

10.hadoop配置信息

1.namenode的本地目录配置成多个，则每个目录存放内容相同    【hdfs-site.xml】    dfs.namenode.name.dir=dir1,dir22.datanode的本地目录可以配置多个，但不是备用目录，数据是随机存放的    【hdfs-site.xml】    dfs.datanode.data.dir=dir1,dir2

11.操作hdfs

1.通过API操作hdfs文件系统

public class TestHDFS {    //注册协议处理器工厂，让java层序能够识别协议    static{        URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());    }    public static void main(String[] args) throws Exception {           //定义url地址        String url = "hdfs://localhost:8020/test.txt";        //url链接        URLConnection conn = new URL(url).openConnection();        //打开输入流        InputStream is = conn.getInputStream();        FileOutputStream fos = new FileOutputStream("/Users/username/api.txt");        byte[] buf = new byte[1024];        int len = -1;        while((len = is.read(buf)) != -1) {            fos.write(buf, 0, len);        }        fos.close();        is.close();        System.out.println(new String(buf));        System.out.println("----- over -----");    }}

2.使用hadoop的文件系统操作hdfs

/** *  *使用hadoop的文件系统进行文件读取 */public class TestFileSystem {    private FileSystem fs;    @Before    public void iniConf() {             try {            //写操作            Configuration conf = new Configuration();            conf.set("fs.defaultFS", "hdfs://localhost:8020/");            fs = FileSystem.get(conf);        } catch (IOException e) {            e.printStackTrace();        }    }    //给hdfs写文件    @Test    public void writeFile() throws Exception {        Path path = new Path("/write.txt");        FSDataOutputStream dos = fs.create(path);        dos.write("hello write!\n".getBytes());        dos.close();        System.out.println("----- over -----");    }    //从hdfs下载文件    @Test    public void readFile() throws Exception {        Path path = new Path("/read.txt");        FSDataInputStream fis = fs.open(path);        FileOutputStream fos = new FileOutputStream("/Users/username/read.txt");        IOUtils.copyBytes(fis, fos, 1024);        IOUtils.closeStream(fos);        IOUtils.closeStream(fis);        System.out.println("----- over -----");    }    //向hdfs写入文件，指定副本数    @Test    public void writerFileInReplication() throws Exception {        Path path = new Path("hdfs://localhost:8020/replication.txt");        FSDataOutputStream dos = fs.create(path, (short)2);        dos.write("hello replication!\n".getBytes());        dos.close();        System.out.println("----- over -----");    }    //向hdfs上传jdk-8u111-macosx-x64.dmg，读取第一个block    @Test    public void readFileSeek128() throws Exception {        Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");        FSDataInputStream fis = fs.open(path);        FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part0");        byte[] buf = new byte[1024];        for(int i = 0; i< 128 * 1024; i++) {            fis.read(buf);            fos.write(buf);        }    }    //向hdfs上传jdk-8u111-macosx-x64.dmg，读取第二个block    @Test    public void readFileSeek() throws Exception {        Path path = new Path("hdfs://localhost:8020/jdk-8u111-macosx-x64.dmg");        FSDataInputStream fis = fs.open(path);        FileOutputStream fos = new FileOutputStream("/Users/username/jdk-part1");        //定位文件偏移量        fis.seek(1024 * 1024 * 128);        IOUtils.copyBytes(fis, fos, 1024);        IOUtils.closeStream(fos);        IOUtils.closeStream(fis);    }    //创建目录    @Test    public void mkdir() throws Exception {        Path path = new Path("/zhao/zhe");        //创建目录        fs.mkdirs(path);        //创建目录，赋予权限        fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));    }    //获取状态，遍历目录    @Test    public void fileStatus() throws Exception {        Path path = new Path("/");        FileStatus fstatus = fs.getFileStatus(path);        Class clazz = FileStatus.class;        Method[] ms = clazz.getDeclaredMethods();        for(Method m : ms) {            String mname = m.getName();            Class[] ptype = m.getParameterTypes();            if(mname.startsWith("get") && (ptype == null || ptype.length ==0)) {                if(!mname.equals("getSymlink")) {                                       Object ret = m.invoke(fstatus, null);                    System.out.println(mname + "() = " + ret);                }            }        }    }}    //遍历目录    @Test    public void recursiveFile() throws Exception {        print(fs.getFileStatus(new Path("/")));    }    private void print(FileStatus fss) {        try {            Path path = fss.getPath();            //打印路径名称            System.out.println(path.toUri().getPath());            if(fss.isDirectory()) {                //列出路径下的所有资源                FileStatus[] fsarr = fs.listStatus(path);                if(fsarr != null && fsarr.length > 0) {                    for(FileStatus ff: fsarr) {                        print(ff);                    }                }            }        } catch (Exception e) {            e.printStackTrace();        }    }    //删除文件或者目录    @Test    public void deleteFile() throws Exception {        Path path = new Path("/read.txt");        //递归删除        fs.delete(path, true);    }

0 0