windows平台远程访问HDFS文件系统

来源:互联网 发布:股票行情分析软件 编辑:程序博客网 时间:2024/06/05 16:09

开发工具:eclipse + maven + jdk1.8

案例代码

package com.zhiwei.hdfs;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IOUtils;/** *  问题:Permission Denied *   *  设置hadoop目录的访问权限:hdfs dfs -chmod -R 777 hadoop目录路径  *   *  Hadoop版本:hadoop-2.7.3 */public class HdfsClient {    private static String hdfsPath = "hdfs://192.168.204.129:9090";    private static String prefix = "hdfs://";    private static String targetHost = "localhost";    private static String targetPort = "9090";    private static Configuration conf = new Configuration();    private static FileSystem fileSystem = null;    private HdfsClient(){}    /**     * HDFS客户端初始化     * @param host     * @param port     */    public static void initClient(String host,String port) {        initClient(host,port,"root");    }    public static void initClient(String host,String port, String user) {        try {            targetHost = host;            targetPort = port;            try {                //指定用户名连接HDFS                fileSystem = FileSystem.get(URI.create(prefix + targetHost + ":" + targetPort), conf, user);            } catch (InterruptedException e) {                e.printStackTrace();            }        } catch (IOException e) {            e.printStackTrace();        }    }    /**     * 获取HDFS集群DataNode节点信息     * Xceivers : 指datanode当前用于传输数据的线程数     * @return     */    public static DatanodeInfo[] getDatanodeInfos(){        DatanodeInfo[] datanodeInfos= null;        try {            DistributedFileSystem dbfs = (DistributedFileSystem) fileSystem;            datanodeInfos = dbfs.getDataNodeStats();        } catch (IOException e) {            e.printStackTrace();            return null;        }        return datanodeInfos;    }    /**     * 判断HDFS文件是否存在     * @param hdfsFile     * @return     */    public static boolean isFileExist(String hdfsFile){        boolean isSuccess = false;        try {            isSuccess = fileSystem.exists(new Path(hdfsFile));        } catch (IOException e) {            e.printStackTrace();            return false;        }        return isSuccess;    }    /**     * 获取HDFS目录下的所有文件信息     * @param hdfsFileDir     * @return     */    public static FileStatus[] getFilesByDir(String hdfsFileDir){        FileStatus[] fileStatus = null;        try {            fileSystem = FileSystem.get(URI.create(hdfsPath),conf);            fileStatus = fileSystem.listStatus(new Path(hdfsFileDir));        } catch (IOException e) {            e.printStackTrace();            return null;        }        return fileStatus;    }     /**      * HDFS创建目录(递归创建)      * @param path      * @throws IOException      */    public static boolean makeHdfsDir(String hdfsFileDir){        boolean isSuccess = false;        try {            isSuccess = fileSystem.mkdirs(new Path(hdfsFileDir));        } catch (IOException e) {            e.printStackTrace();            return false;        }        return isSuccess;    }    public static boolean deleteHdfsFile(String hdfsFilePath) {        return deleteHdfsFile(hdfsFilePath,true);    }    /**      * 删除HDFS文件      * @param hdfsFilePath HDFS文件路径      * @param isRecursive 是否递归删除      */   public static boolean deleteHdfsFile(String hdfsFilePath, Boolean isRecursive){       boolean isSuccess = false;        try {            isSuccess = fileSystem.delete(new Path(hdfsFilePath),isRecursive);        } catch (IOException e) {            e.printStackTrace();            return false;        }        return isSuccess;   }     /**      * 读取HDFS文件内容      * @param hdfsFilePath     * @throws IOException       */    public static byte[] readHdfsFile(String hdfsFilePath) throws IOException{        FSDataInputStream fis = null;         byte[] data = null;        try {            fis = fileSystem.open(new Path(hdfsFilePath));            data = new byte[fis.available()];            fis.read(data, 0, fis.available());        } finally {            IOUtils.closeStream(fis);        }        return data;    }    /**     * 重命名HDFS文件     * @param oldName 源文件名:全路径     * @param newName 目标文件名:全路径     * @return     */    public static boolean renameHdfsFile(String oldName,String newName){            try {                 fileSystem.rename(new Path(oldName), new Path(newName));            } catch (IOException e) {                e.printStackTrace();                return false;            }           return true;    }    /**     * 将信息写入HDFS新文件中保存     * @param dest HDFS新文件路径     * @param content 信息字节数组     * @return     */    public static boolean writeInfoToHdfsFile(String dest,byte[] content){            FSDataOutputStream fsDataOutputStream = null;            try {                fsDataOutputStream = fileSystem.create(new Path(dest));                fsDataOutputStream.write(content);                fsDataOutputStream.flush();            } catch (IOException e) {                e.printStackTrace();                return false;            }finally {                IOUtils.closeStream(fsDataOutputStream);            }            return true;    }    /**     * HDFS默认文件文件上传方法     * @param src 源文件地址     * @param dest hdfs文件地址     * @return 状态     */    public static boolean uploadLocalFileToHDFS(String src,String dest){        return uploadLocalFileToHDFS(false, false, src, dest);    }    /**     * 上传本地文件到Hadoop的HDFS文件系统     * @param delSrc:是否删除源文件:默认不删除     * @param override:是否覆盖同名文件:默认不覆盖     * @param src 本地文件全路径     * @param dest hadoop HDFS文件系统全路径     * @return      */    public static boolean uploadLocalFileToHDFS(boolean delSrc,boolean override,String src,String dest){        try {            //注意:目标地址可以写全路径,如果不写则默认在当前访问的用户主目录下操作            fileSystem.copyFromLocalFile(delSrc,override,new Path(src), new Path(dest));        } catch (IOException e) {            e.printStackTrace();            return false;        }        return true;    }    /**     * 关闭HDFS客户端     */    public static void close() {        if(fileSystem != null ) {            try {                fileSystem.close();            } catch (IOException e) {                e.printStackTrace();            }        }    }}

测试代码:

package com.zhiwei.hdfs;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.junit.After;import org.junit.Before;import org.junit.Test;public class HdfsClientTest {    @Before    public void init() {        System.setProperty("hadoop.home.dir", "D:\\Tools\\hadoop-2.7.3");    }    /**     * 获取HDFS节点信息     * @throws Exception     */    @Test    public void getDatanodeInfosTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        DatanodeInfo[] datanodeInfos = HdfsClient.getDatanodeInfos();        for(DatanodeInfo datanodeInfo : datanodeInfos) {            System.out.println("节点主机名:" + datanodeInfo.getHostName());            System.out.println("节点Http访问端口:" + datanodeInfo.getInfoPort());            System.out.println("节点IPC访问端口:" + datanodeInfo.getIpcPort());            System.out.println("节点已用缓存:" + datanodeInfo.getCacheUsedPercent());        }    }    /**     * 判断文件是否存在     * @throws Exception     */    @Test    public void isFileExistTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println(HdfsClient.isFileExist("/data"));    }    /**     * 获取目录下的文件列表     * @throws Exception     */    @Test    public void getFilesByDirTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        FileStatus[] fStatus = HdfsClient.getFilesByDir("/data");        for(FileStatus fs : fStatus) {            System.out.println("子文件路径:" + fs.getPath()                              + ", " + "子文件属组:" + fs.getGroup()                              + ", 文件属主: " + fs.getOwner());        }    }    /**     * HDFS创建目录     * @throws Exception     */    @Test    public void makeHdfsDirTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("文件创建成功: " + HdfsClient.makeHdfsDir("/data/test"));    }    /**     * HDFS删除目录     * @throws Exception     */    @Test    public void deleteHdfsFileTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("文件删除成功: " + HdfsClient.deleteHdfsFile("/data/test",true));    }    /**     * 读取HDFS文件     * @throws Exception     */    @Test    public void readHdfsFileTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("HDFS文件内容: " + Bytes.toString(HdfsClient.readHdfsFile("/data/mapreduce/output/part-r-00000")));    }    /**     * 读取文件重命名     * @throws Exception     */    @Test    public void renameHdfsFileTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("文件重命名成功: " + HdfsClient.renameHdfsFile("/data/mapreduce/output/test","/data/mapreduce/output/test1"));    }    /**     * 将数据写入HDFS     * @throws Exception     */    @Test    public void writeInfoToHdfsFileTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("数据写入HDFS: " + HdfsClient.writeInfoToHdfsFile("/data/Test","/data/mapreduce/output/test1".getBytes()));    }    /**     * 文件上传HDFS     * @throws Exception     */    @Test    public void uploadLocalFileToHDFSTest() throws Exception {        HdfsClient.initClient("192.168.204.129", "9090", "squirrel");        System.out.println("文件上传HDFS: " + HdfsClient.uploadLocalFileToHDFS(true,true,"d://temp/test.txt","/data/Test"));    }    @After    public void close() {        HdfsClient.close();    }}

maven pom.xml配置

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.zhiwei</groupId>  <artifactId>hadoop</artifactId>  <version>0.0.1-SNAPSHOT</version>  <packaging>jar</packaging>  <name>Hadoop</name>  <url>http://maven.apache.org</url>   <properties>        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>        <java.version>1.8</java.version>        <hadoop.version>2.7.3</hadoop.version>        <hbase.version>1.2.6</hbase.version>        <hive.version>2.3.1</hive.version>        <zookeeper.version>3.4.8</zookeeper.version>        <curator.version>4.0.0</curator.version>        <fastjson.version>1.2.41</fastjson.version>        <mahout.version>0.13.0</mahout.version>        <kafka.version>0.11.0.2</kafka.version>        <zkclient.version>0.10</zkclient.version>        <junit.version>4.12</junit.version>    </properties>  <dependencies>    <!-- 配置Zookeeper -->    <dependency>        <groupId>org.apache.zookeeper</groupId>        <artifactId>zookeeper</artifactId>        <version>${zookeeper.version}</version>    </dependency>    <!-- Netflix Zookeeper组件 -->    <dependency>    <groupId>org.apache.curator</groupId>    <artifactId>curator-client</artifactId>    <version>${curator.version}</version>    </dependency>    <!-- Netflix Zookeeper组件 -->    <dependency>        <groupId>com.101tec</groupId>        <artifactId>zkclient</artifactId>        <version>${zkclient.version}</version>    </dependency>    <!-- Hadoop -->     <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-client</artifactId>            <version>${hadoop.version}</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-common</artifactId>            <version>${hadoop.version}</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-hdfs</artifactId>            <version>${hadoop.version}</version>        </dependency>        <!-- Hbase -->        <dependency>            <groupId>org.apache.hbase</groupId>            <artifactId>hbase-client</artifactId>            <version>${hbase.version}</version>        </dependency>        <dependency>         <groupId>org.apache.hbase</groupId>         <artifactId>hbase-server</artifactId>         <version>${hbase.version}</version>       </dependency>       <!-- hive -->       <dependency>          <groupId>org.apache.hive</groupId>          <artifactId>hive-jdbc</artifactId>          <version>${hive.version}</version>       </dependency>       <dependency>         <groupId>org.apache.hive</groupId>         <artifactId>hive-exec</artifactId>         <version>${hive.version}</version>       </dependency>        <dependency>            <groupId>org.apache.hive</groupId>            <artifactId>hive-metastore</artifactId>            <version>${hive.version}</version>        </dependency>       <!-- Kafka -->      <!--  <dependency>            <groupId>org.apache.kafka</groupId>            <artifactId>kafka-clients</artifactId>            <version>${kafka.version></version>        </dependency> -->        <!-- mahout -->        <dependency>            <groupId>org.apache.mahout</groupId>            <artifactId>mahout-math</artifactId>            <version>${mahout.version}</version>        </dependency>        <dependency>            <groupId>org.apache.mahout</groupId>            <artifactId>mahout-hdfs</artifactId>            <version>${mahout.version}</version>        </dependency>       <!-- Alibaba FastJson -->       <dependency>            <groupId>com.alibaba</groupId>            <artifactId>fastjson</artifactId>            <version>${fastjson.version}</version>        </dependency>        <!-- 配置JUNIT -->        <dependency>            <groupId>junit</groupId>            <artifactId>junit</artifactId>            <version>${junit.version}</version>        </dependency>            <!-- 覆盖默认Guava(hive)版本,防止出现Guava版本冲突问题 -->       <dependency>          <groupId>com.google.guava</groupId>          <artifactId>guava</artifactId>          <version>11.0.2</version>        </dependency>    </dependencies>    <!-- 指定maven项目的JDK版本 -->    <build>          <plugins>            <plugin>              <groupId>org.apache.maven.plugins</groupId>              <artifactId>maven-compiler-plugin</artifactId>              <configuration>                <source>${java.version}</source>                <target>${java.version}</target>              </configuration>            </plugin>         </plugins>      </build>  </project>

注意: hadoop运行其实并不依赖与Hadoop Eclipse插件,Hadoop Eclipse插件只是简单的封装Hadoop的配置参数,本质也是通过Hadoop的API访问的,将HDFS文件系统以树结构的形式呈现。

项目结构:
这里写图片描述

原创粉丝点击