Hadoop上路_07-使用Java操作HDFS

来源:互联网 发布:淘宝快手刷粉是真的吗 编辑:程序博客网 时间:2024/05/18 03:25

目录[-]

  • 1.HDFS操作-查:   
  •         1)遍历HDFS文件,基于hadoop-0.20.2: 
  •         2)Run on Hadoop: 
  •         3)遍历HDFS文件,基于Hadoop-1.1.2一:  
  •         4)遍历HDFS文件,基于Hadoop-1.1.2二:
  •         5)判断HDFS中指定名称的目录或文件:   
  •         6)查看HDFS文件的最后修改时间:   
  •         7)查看HDFS中指定文件的状态:   
  •         8)读取HDFS中txt文件的内容:   
  • 2.HDFS操作-增:  
  •         1)上传文件到HDFS,基于hadoop-0.20.2:  
  •         2)上传文件到HDFS,基于hadoop-1.1.2一: 
  •         3)上传文件到HDFS,基于hadoop-1.1.2二:
  •         4)在HDFS中创建目录和文件:  
  • 3.HDFS操作-改:   
  •         1)重命名文件: 
  •         2)删除文件: 
  •    

    1.HDFS操作-查:   

            1)遍历HDFS文件,基于hadoop-0.20.2: 

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass Query {
     
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            // 1.创建配置器
            Configuration conf = newConfiguration();
            // 2.创建文件系统
            hdfs = FileSystem.get(conf);
            // 3.遍历HDFS上的文件和目录
            FileStatus[] fs = hdfs.listStatus(newPath("/home"));
            if(fs.length > 0) {
                for(FileStatus f : fs) {
                    showDir(f);
                }
            }
        }
     
        privatestatic void showDir(FileStatus fs) throwsException {
            Path path = fs.getPath();
            System.out.println(path);
         // 如果是目录
            if(fs.isDir()) {
                FileStatus[] f = hdfs.listStatus(path);
                if(f.length > 0) {
                    for(FileStatus file : f) {
                        showDir(file);
                    }
                }
            }
        }
    }


            错误1  
                   类包找不到   
                   
      

            解决: 
                  因为这是个Java Project,即使在window -> preferences -> Hadoop Map/Reduce中配置了hadoop解压目录也不能自动导入jar包。
                  须要手动引入以下jar包,Build Path Add to Build Path 
                  hadoop-core-1.1.2.jar   
                  commons-lang-2.4.jar   
                  jackson-core-asl-1.8.8.jar   
                  jackson-mapper-asl-1.8.8.jar   
                  commons-logging-1.1.1.jar    
                  commons-configuration-1.6.jar   
                     

            2Run on Hadoop: 

                       

            错误2  
                    此时实际搜索的是Ubuntu的目录。并且层级目录中可能含有特殊字符导致URI错误
                    

                     

            解决:
                    上面的代码是基于hadoop-0.20.2写的,不再适用于hadoop-1.1.2。改。  
                    

            3遍历HDFS文件,基于Hadoop-1.1.2一:  

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    packagecn.cvu.hdfs;
    //导包略 
    publicclass Query {
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            // 1.创建配置器
            Configuration conf = newConfiguration();
            // 2.创建文件系统(指定为HDFS文件系统到URI) 
            hdfs = FileSystem.get(URI.create("hdfs://localhost:9000/"), conf); 
            // 3.遍历HDFS上的文件和目录
            FileStatus[] fs = hdfs.listStatus(newPath("/home"));
            if(fs.length>0){
                for(FileStatus f : fs) {
                    showDir(f);
                }
            }
        }
     
        privatestatic void showDir(FileStatus fs) throwsException {
            //递归略  
        }
    }
       

            4遍历HDFS文件,基于Hadoop-1.1.2二:

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    publicstatic void main(String[] args) throwsException {
        // 1.创建配置器
        Configuration conf = newConfiguration();
      // 2.加载指定的配置文件
        conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml")); 
        // 3.创建文件系统
        hdfs = FileSystem.get(conf);
        // 4.遍历HDFS上的文件和目录
        FileStatus[] fs = hdfs.listStatus(newPath("/home"));
        if(fs.length > 0) {
            for(FileStatus f : fs) {
                showDir(f);
            }
        }
    }

      

            5)判断HDFS中指定名称的目录或文件:   

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass Query {
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            // 1.配置器
            Configuration conf = newConfiguration();
            conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
            // 2.文件系统
            hdfs = FileSystem.get(conf);
            // 3.遍历HDFS上目前拥有的文件和目录
            FileStatus[] fs = hdfs.listStatus(newPath("/home"));
            if(fs.length > 0) {
                for(FileStatus f : fs) {
                    showDir(f);
                }
            }
        }
     
        privatestatic void showDir(FileStatus fs) throwsException {
            Path path = fs.getPath();
            // 如果是目录
            if(fs.isDir()) {
                if(path.getName().equals("system")) {
                    System.out.println(path + "是目录");
                }
                FileStatus[] f = hdfs.listStatus(path);
                if(f.length > 0) {
                    for(FileStatus file : f) {
                        showDir(file);
                    }
                }
            }else{
                if(path.getName().equals("system")) {
                    System.out.println(path + "是文件");
                }
            }
        }
    }

            6)查看HDFS文件的最后修改时间:   

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    packagecn.cvu.hdfs;
     
    importjava.net.URI;
    importjava.util.Date;
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass Query2 {
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            // 1.创建配置器
            Configuration conf = newConfiguration();
            // 2.创建文件系统(指定为HDFS文件系统到URI)
            hdfs = FileSystem.get(URI.create("hdfs://localhost:9000/"), conf);
            // 3.列出HDFS上目前拥有的文件和目录
            FileStatus[] fs = hdfs.listStatus(newPath("/home"));
            if(fs.length>0){
                for(FileStatus f : fs) {
                    showDir(f);
                }
            }
        }
     
        privatestatic void showDir(FileStatus fs) throwsException {
            Path path = fs.getPath();
            //获取最后修改时间
            longtime = fs.getModificationTime();  
            System.out.println("HDFS文件的最后修改时间:"+newDate(time));  
            System.out.println(path);
            if(fs.isDir()) {
                FileStatus[] f = hdfs.listStatus(path);
                if(f.length>0){
                    for(FileStatus file : f) {
                        showDir(file);
                    }
                }
            }
        }
    }

     

            7)查看HDFS指定文件的状态:   

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    packagecn.cvu.hdfs;
     
    importjava.net.URI;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.BlockLocation;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass Query {
     
        publicstatic void main(String[] args) throwsException {
            //1.配置器
            Configuration conf = newConfiguration();
            conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
            //2.文件系统
            FileSystem fs = FileSystem.get(conf);
            //3.已存在的文件
            Path path = newPath("/home/hm/hadoop-hm/mapred/system/jobtracker.info");
            //4.文件状态
            FileStatus status = fs.getFileStatus(path);
            //5.文件块
            BlockLocation[] blockLocations = fs.getFileBlockLocations(status, 0, status.getLen());
            intblockLen = blockLocations.length;
            System.err.println("块数量:"+blockLen);
            for(inti = 0; i < blockLen; i++) {
                // 主机名
                String[] hosts = blockLocations[i].getHosts();
                for(String host : hosts) {
                    System.err.println("主机:"+host);
                }
            }
        }
    }

      

            8)读取HDFStxt文件的内容:   

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FSDataInputStream;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass Query {
     
        publicstatic void main(String[] args) throwsException {
            Configuration conf = newConfiguration();
            conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
            FileSystem fs = FileSystem.get(conf);
            Path path = newPath("/test.txt");
            FSDataInputStream is = fs.open(path);
            FileStatus stat = fs.getFileStatus(path);
            byte[] buffer = newbyte[Integer.parseInt(String.valueOf(stat.getLen()))];
            is.readFully(0, buffer);
            is.close();
            fs.close();
            System.out.println(newString(buffer));
        }
    }

       

    2.HDFS操作-增:  

            1)上传文件到HDFS,基于hadoop-0.20.2:  

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass TestHdfs {
     
        publicstatic void main(String[] args) throwsException {
    // 1.创建配置器 
            Configuration conf = newConfiguration(); 
    // 2.创建文件系统 
            FileSystem hdfs = FileSystem.get(conf); 
    // 3.创建可供hadoop使用的文件系统路径 
    Path src = newPath("/home/hm/test.txt");//本地目录/文件 
            Path dst = newPath("/home"); //目标目录/文件
    // 4.拷贝本地文件上传(本地文件,目标路径) 
            hdfs.copyFromLocalFile(src, dst); 
            System.err.println("文件上传成功至:"+ conf.get("fs.default.name")); 
            // 5.列出HDFS上的文件 
            FileStatus[] fs = hdfs.listStatus(dst); 
            for(FileStatus f : fs) {  
                System.err.println(f.getPath()); 
            
        
    }


            错误: 
                    
    目标路径的操作权限不够,因为这实际是Ubuntu/home目录! 

    Exception in thread "main" java.io.FileNotFoundException: /home/test.txt (Permission denied 权限不够)
    at java.io.FileOutputStream.open(Native Method)
    at java.io.FileOutputStream.<init>(FileOutputStream.java:212)
    at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:188)
    at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.java:184)
    at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:255)
    at org.apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.java:236)
    at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.java:335)
    at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:381)
    at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:364)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:555)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:536)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:443)
    at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:229)
    at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:163)
    at org.apache.hadoop.fs.LocalFileSystem.copyFromLocalFile(LocalFileSystem.java:67)
    at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:1143)          

            解决: 
                    
    上传到有权限的目录(囧)    


            结果:
                    
    实际是上传到了Ubuntu/home/hm/hadoop-hm目录中!  

       

            2)上传文件到HDFS,基于hadoop-1.1.2一: 

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    packagecn.cvu.hdfs;
     
    importjava.net.URI;
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass TestHdfs1 {
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            //1.创建配置器
            Configuration conf = newConfiguration();
            //2.初始化文件系统(指定为HDFS文件系统URI,配置)
            hdfs = FileSystem.get(URI.create("hdfs://localhost:9000/"), conf);
            //3.创建本地目录/文件路径
            Path src = newPath("/home/hm/test.txt");
            //4.创建HDFS目录(指定为HDFS文件系统)
            Path dst = newPath("hdfs://localhost:9000//home/hm/hadoop-hm");
            //5.执行上传
            hdfs.copyFromLocalFile(src, dst);
            System.out.println("文件上传成功至:"+ conf.get("fs.default.name"));
            //6.遍历HDFS目录
            FileStatus[] list = hdfs.listStatus(dst);
            for(FileStatus f : list) {
                showDir(f);
            }
        }
     
        privatestatic void showDir(FileStatus f) throwsException {
            if(f.isDir()) {
                System.err.println("目录:"+ f.getPath());
                FileStatus[] listStatus = hdfs.listStatus(f.getPath());
                for(FileStatus fn : listStatus) {
                    showDir(fn);
                }
            }else{
                System.out.println("文件:"+ f.getPath());
            }
        }
    }

       

            3)上传文件到HDFS,基于hadoop-1.1.2二:

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass TestHdfs2 {
        privatestatic FileSystem hdfs;
     
        publicstatic void main(String[] args) throwsException {
            //1. 创建配置器
            Configuration conf = newConfiguration();
            //2. 手动加载配置
            conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
            //3. 创建文件系统
            hdfs = FileSystem.get(conf);
            //4. 本地文件
            Path src = newPath("/home/hm/test.txt");
            //5. 目标路径
            Path dst = newPath("/home");
            //6. 上传文件
            if(!hdfs.exists(newPath("/home/test.txt"))){
                hdfs.copyFromLocalFile(src, dst);
                System.err.println("文件上传成功至: " + conf.get("fs.default.name"));
            }else{
                System.err.println(conf.get("fs.default.name") + " 中已经存在 test.txt");
            }
            //7. 遍历HDFS文件
            System.out.println("\nHDFS文件系统中存在的目录和文件:");
            FileStatus[] fs = hdfs.listStatus(dst);
            for(FileStatus f : fs) {
                showDir(f);
            }
        }
        privatestatic void showDir(FileStatus f) throwsException{
            if(f.isDir()){
                System.err.println("目录:"+ f.getPath());
                FileStatus[] listStatus = hdfs.listStatus(f.getPath());
                for(FileStatus fn : listStatus){
                    showDir(fn);
                }
            }else{
                System.err.println("文件:"+ f.getPath());
            }
        }
    }

       

      

      

            4)在HDFS中创建目录和文件:  

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    packagecn.cvu.hdfs;
     
    importorg.apache.hadoop.conf.Configuration;
    importorg.apache.hadoop.fs.FSDataOutputStream;
    importorg.apache.hadoop.fs.FileStatus;
    importorg.apache.hadoop.fs.FileSystem;
    importorg.apache.hadoop.fs.Path;
     
    publicclass TestHdfs2 {
     
        publicstatic void main(String[] args) throwsException {
            Configuration conf = newConfiguration();
            conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            // 使用HDFS数据输出流(写)对象 在HDSF上根目录创建一个文件夹,其内再创建文件
            FSDataOutputStream out = hdfs.create(newPath("/eminem/hip-hop.txt"));
            // 在文件中写入一行数据,必须使用UTF-8
            //out.writeUTF("Hell使用UTF-8");  //不能用?
            out.write("痞子阿姆,Hello !".getBytes("UTF-8"));
            out = hdfs.create(newPath("/alizee.txt"));
            out.write("艾莉婕,Hello !".getBytes("UTF-8"));
            out.close();
            FileStatus[] fs = hdfs.listStatus(newPath("/"));
            for(FileStatus f : fs) {
                System.out.println(f.getPath());
            }
        }
    }

     

    3.HDFS操作-改:   

            1)重命名文件: 

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    publicstatic void main(String[] args) throwsException {
        Configuration conf = newConfiguration();
        conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
        FileSystem fs = FileSystem.get(conf);
        //重命名:fs.rename(源文件,新文件)
        booleanrename = fs.rename(newPath("/test.txt"),newPath("/new_test.txt"));
        System.out.println(rename);
    }

            2)删除文件: 

    ?
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    publicstatic void main(String[] args) throwsException {
        Configuration conf = newConfiguration();
        conf.addResource(newPath("/home/hm/hadoop-1.1.2/conf/core-site.xml"));
        FileSystem fs = FileSystem.get(conf);
        //删除
        //fs.delete(new Path("/new_test.txt")); //已过时
        //程序结束时执行
        booleanexit = fs.deleteOnExit(newPath("/new_test.txt"));
        System.out.println("删除执行:"+exit);
        //判断删除(路径,true。false=非空时不删除,抛RemoteException、IOException异常)
        booleandelete = fs.delete(newPath("/test.txt"),true);
        System.out.println("执行删除:"+delete);
    }

    - end 
    0 0
    原创粉丝点击