hadoop学习(五)----HDFS的java操作

来源:互联网 发布:川普 好莱坞 知乎 编辑:程序博客网 时间:2024/06/05 09:11

前面我们基本学习了HDFS的原理,hadoop环境的搭建,下面开始正式的实践,语言以java为主。这一节来看一下HDFS的java操作。

1 环境准备

上一篇说了windows下搭建hadoop环境,开始之前先启动hadoop。我本地的编译器是idea。搭建maven工程:

pom.xml文件:

<dependencies>    <dependency>      <groupId>junit</groupId>      <artifactId>junit</artifactId>      <version>4.12</version>      <scope>test</scope>    </dependency>    <dependency>      <groupId>org.apache.hadoop</groupId>      <artifactId>hadoop-common</artifactId>      <version>2.7.3</version>    </dependency>    <dependency>      <groupId>org.apache.hadoop</groupId>      <artifactId>hadoop-hdfs</artifactId>      <version>2.7.3</version>    </dependency>    <dependency>      <groupId>org.apache.hadoop</groupId>      <artifactId>hadoop-mapreduce-client-core</artifactId>      <version>2.7.3</version>    </dependency>    <dependency>      <groupId>org.apache.hadoop</groupId>      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>      <version>2.7.3</version>    </dependency>    <dependency>      <groupId>log4j</groupId>      <artifactId>log4j</artifactId>      <version>1.2.17</version>    </dependency></dependencies>

新建测试类:FileOperator.java

import org.apache.commons.compress.utils.IOUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.*;import org.apache.hadoop.fs.permission.FsPermission;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.junit.Before;import org.junit.Test;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.InputStream;import java.net.URI;import java.text.SimpleDateFormat;import java.util.Date;/** * Created by Administrator on 2017/12/3. */public class FileOperator {    private static final Logger logger = LoggerFactory.getLogger(FileOperator.class);    public static DistributedFileSystem dfs=null;    public static String nameNodeUri="hdfs://localhost:9000";    @Before    public void initFileSystem() throws Exception{        logger.info("initial hadoop env----");        dfs=new DistributedFileSystem();        dfs.initialize(new URI(nameNodeUri), new Configuration());        logger.info("connection is successful");        Path workingDirectory = dfs.getWorkingDirectory();        System.out.println("current workspace is :"+workingDirectory);    }    /**     * 创建文件夹     * @throws Exception     */    @Test    public void testMkDir() throws Exception{        boolean res = dfs.mkdirs(new Path("/test/aaa/bbb"));        System.out.println("目录创建结果:"+(res?"创建成功":"创建失败"));    }    /**     * 删除目录/文件     * @throws Exception     */    @Test    public void testDeleteDir() throws Exception{        dfs.delete(new Path("/test/aaa/bbb"), false);    }    /**     * 获取指定目录下所有文件(忽略目录)     * @throws Exception     * @throws IllegalArgumentException     */    @Test    public void testFileList() throws Exception{        RemoteIterator<LocatedFileStatus> listFiles = dfs.listFiles(new Path("/"), true);        SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");        while (listFiles.hasNext()) {            LocatedFileStatus fileStatus = (LocatedFileStatus) listFiles.next();            //权限            FsPermission permission = fileStatus.getPermission();            //拥有者            String owner = fileStatus.getOwner();            //组            String group = fileStatus.getGroup();            //文件大小byte            long len = fileStatus.getLen();            long modificationTime = fileStatus.getModificationTime();            Path path = fileStatus.getPath();            System.out.println("-------------------------------");            System.out.println("permission:"+permission);            System.out.println("owner:"+owner);            System.out.println("group:"+group);            System.out.println("len:"+len);            System.out.println("modificationTime:"+sdf.format(new Date(modificationTime)));            System.out.println("path:"+path);        }    }    /**     * 【完整】文件上传     * 注意:文件上传在Window开发环境下,使用apache-common提供的<code>org.apache.commons.io.IOUtils.copy</code>可能存在问题     */    @Test    public void testUploadFullFile() throws Exception{        FSDataOutputStream out = dfs.create(new Path("/test/aaa/testFile.txt"), true);        InputStream in = new FileInputStream("F:\\test\\cluster\\input\\testFile.txt");        IOUtils.copy(in, out);        System.out.println("上传完毕");    }    /**     * 【分段|部分】文件上传     * 注意:文件上传在Window开发环境下,使用apache-common提供的<code>org.apache.commons.io.IOUtils.copy</code>可能存在问题     */    @Test    public void testUploadFile2() throws Exception{        FSDataOutputStream out = dfs.create(new Path("/test/aaa/testFile1.txt"), true);        InputStream in = new FileInputStream("F:\\test\\cluster\\input\\testFile.txt");        org.apache.commons.io.IOUtils.copyLarge(in, out, 6, 12);        System.out.println("上传完毕");    }    /**     * 【完整】下载文件     * 注意:windows开发平台下,使用如下API     */    @Test    public void testDownloadFile() throws Exception{        //使用Java API进行I/O,设置useRawLocalFileSystem=true        dfs.copyToLocalFile(false,new Path("/test/aaa/testFile.txt"),                new Path("E:/"),true);        System.out.println("下载完成");    }    /**     * 【部分】下载文件     */    @Test    public void testDownloadFile2() throws Exception{        //使用Java API进行I/O,设置useRawLocalFileSystem=true        FSDataInputStream src = dfs.open(new Path("/test/aaa/testFile.txt"));        FileOutputStream des = new FileOutputStream(new File("E:/","download_testFile.txt"));        src.seek(6);        org.apache.commons.io.IOUtils.copy(src, des);        System.out.println("下载完成");    }}

代码如上,如要使用请调整相关目录。