windows平台远程访问HDFS文件系统
来源:互联网 发布:股票行情分析软件 编辑:程序博客网 时间:2024/06/05 16:09
开发工具:eclipse + maven + jdk1.8
案例代码:
package com.zhiwei.hdfs;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IOUtils;/** * 问题:Permission Denied * * 设置hadoop目录的访问权限:hdfs dfs -chmod -R 777 hadoop目录路径 * * Hadoop版本:hadoop-2.7.3 */public class HdfsClient { private static String hdfsPath = "hdfs://192.168.204.129:9090"; private static String prefix = "hdfs://"; private static String targetHost = "localhost"; private static String targetPort = "9090"; private static Configuration conf = new Configuration(); private static FileSystem fileSystem = null; private HdfsClient(){} /** * HDFS客户端初始化 * @param host * @param port */ public static void initClient(String host,String port) { initClient(host,port,"root"); } public static void initClient(String host,String port, String user) { try { targetHost = host; targetPort = port; try { //指定用户名连接HDFS fileSystem = FileSystem.get(URI.create(prefix + targetHost + ":" + targetPort), conf, user); } catch (InterruptedException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); } } /** * 获取HDFS集群DataNode节点信息 * Xceivers : 指datanode当前用于传输数据的线程数 * @return */ public static DatanodeInfo[] getDatanodeInfos(){ DatanodeInfo[] datanodeInfos= null; try { DistributedFileSystem dbfs = (DistributedFileSystem) fileSystem; datanodeInfos = dbfs.getDataNodeStats(); } catch (IOException e) { e.printStackTrace(); return null; } return datanodeInfos; } /** * 判断HDFS文件是否存在 * @param hdfsFile * @return */ public static boolean isFileExist(String hdfsFile){ boolean isSuccess = false; try { isSuccess = fileSystem.exists(new Path(hdfsFile)); } catch (IOException e) { e.printStackTrace(); return false; } return isSuccess; } /** * 获取HDFS目录下的所有文件信息 * @param hdfsFileDir * @return */ public static FileStatus[] getFilesByDir(String hdfsFileDir){ FileStatus[] fileStatus = null; try { fileSystem = FileSystem.get(URI.create(hdfsPath),conf); fileStatus = fileSystem.listStatus(new Path(hdfsFileDir)); } catch (IOException e) { e.printStackTrace(); return null; } return fileStatus; } /** * HDFS创建目录(递归创建) * @param path * @throws IOException */ public static boolean makeHdfsDir(String hdfsFileDir){ boolean isSuccess = false; try { isSuccess = fileSystem.mkdirs(new Path(hdfsFileDir)); } catch (IOException e) { e.printStackTrace(); return false; } return isSuccess; } public static boolean deleteHdfsFile(String hdfsFilePath) { return deleteHdfsFile(hdfsFilePath,true); } /** * 删除HDFS文件 * @param hdfsFilePath HDFS文件路径 * @param isRecursive 是否递归删除 */ public static boolean deleteHdfsFile(String hdfsFilePath, Boolean isRecursive){ boolean isSuccess = false; try { isSuccess = fileSystem.delete(new Path(hdfsFilePath),isRecursive); } catch (IOException e) { e.printStackTrace(); return false; } return isSuccess; } /** * 读取HDFS文件内容 * @param hdfsFilePath * @throws IOException */ public static byte[] readHdfsFile(String hdfsFilePath) throws IOException{ FSDataInputStream fis = null; byte[] data = null; try { fis = fileSystem.open(new Path(hdfsFilePath)); data = new byte[fis.available()]; fis.read(data, 0, fis.available()); } finally { IOUtils.closeStream(fis); } return data; } /** * 重命名HDFS文件 * @param oldName 源文件名:全路径 * @param newName 目标文件名:全路径 * @return */ public static boolean renameHdfsFile(String oldName,String newName){ try { fileSystem.rename(new Path(oldName), new Path(newName)); } catch (IOException e) { e.printStackTrace(); return false; } return true; } /** * 将信息写入HDFS新文件中保存 * @param dest HDFS新文件路径 * @param content 信息字节数组 * @return */ public static boolean writeInfoToHdfsFile(String dest,byte[] content){ FSDataOutputStream fsDataOutputStream = null; try { fsDataOutputStream = fileSystem.create(new Path(dest)); fsDataOutputStream.write(content); fsDataOutputStream.flush(); } catch (IOException e) { e.printStackTrace(); return false; }finally { IOUtils.closeStream(fsDataOutputStream); } return true; } /** * HDFS默认文件文件上传方法 * @param src 源文件地址 * @param dest hdfs文件地址 * @return 状态 */ public static boolean uploadLocalFileToHDFS(String src,String dest){ return uploadLocalFileToHDFS(false, false, src, dest); } /** * 上传本地文件到Hadoop的HDFS文件系统 * @param delSrc:是否删除源文件:默认不删除 * @param override:是否覆盖同名文件:默认不覆盖 * @param src 本地文件全路径 * @param dest hadoop HDFS文件系统全路径 * @return */ public static boolean uploadLocalFileToHDFS(boolean delSrc,boolean override,String src,String dest){ try { //注意:目标地址可以写全路径,如果不写则默认在当前访问的用户主目录下操作 fileSystem.copyFromLocalFile(delSrc,override,new Path(src), new Path(dest)); } catch (IOException e) { e.printStackTrace(); return false; } return true; } /** * 关闭HDFS客户端 */ public static void close() { if(fileSystem != null ) { try { fileSystem.close(); } catch (IOException e) { e.printStackTrace(); } } }}
测试代码:
package com.zhiwei.hdfs;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.junit.After;import org.junit.Before;import org.junit.Test;public class HdfsClientTest { @Before public void init() { System.setProperty("hadoop.home.dir", "D:\\Tools\\hadoop-2.7.3"); } /** * 获取HDFS节点信息 * @throws Exception */ @Test public void getDatanodeInfosTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); DatanodeInfo[] datanodeInfos = HdfsClient.getDatanodeInfos(); for(DatanodeInfo datanodeInfo : datanodeInfos) { System.out.println("节点主机名:" + datanodeInfo.getHostName()); System.out.println("节点Http访问端口:" + datanodeInfo.getInfoPort()); System.out.println("节点IPC访问端口:" + datanodeInfo.getIpcPort()); System.out.println("节点已用缓存:" + datanodeInfo.getCacheUsedPercent()); } } /** * 判断文件是否存在 * @throws Exception */ @Test public void isFileExistTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println(HdfsClient.isFileExist("/data")); } /** * 获取目录下的文件列表 * @throws Exception */ @Test public void getFilesByDirTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); FileStatus[] fStatus = HdfsClient.getFilesByDir("/data"); for(FileStatus fs : fStatus) { System.out.println("子文件路径:" + fs.getPath() + ", " + "子文件属组:" + fs.getGroup() + ", 文件属主: " + fs.getOwner()); } } /** * HDFS创建目录 * @throws Exception */ @Test public void makeHdfsDirTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("文件创建成功: " + HdfsClient.makeHdfsDir("/data/test")); } /** * HDFS删除目录 * @throws Exception */ @Test public void deleteHdfsFileTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("文件删除成功: " + HdfsClient.deleteHdfsFile("/data/test",true)); } /** * 读取HDFS文件 * @throws Exception */ @Test public void readHdfsFileTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("HDFS文件内容: " + Bytes.toString(HdfsClient.readHdfsFile("/data/mapreduce/output/part-r-00000"))); } /** * 读取文件重命名 * @throws Exception */ @Test public void renameHdfsFileTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("文件重命名成功: " + HdfsClient.renameHdfsFile("/data/mapreduce/output/test","/data/mapreduce/output/test1")); } /** * 将数据写入HDFS * @throws Exception */ @Test public void writeInfoToHdfsFileTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("数据写入HDFS: " + HdfsClient.writeInfoToHdfsFile("/data/Test","/data/mapreduce/output/test1".getBytes())); } /** * 文件上传HDFS * @throws Exception */ @Test public void uploadLocalFileToHDFSTest() throws Exception { HdfsClient.initClient("192.168.204.129", "9090", "squirrel"); System.out.println("文件上传HDFS: " + HdfsClient.uploadLocalFileToHDFS(true,true,"d://temp/test.txt","/data/Test")); } @After public void close() { HdfsClient.close(); }}
maven pom.xml配置
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.zhiwei</groupId> <artifactId>hadoop</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>Hadoop</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <java.version>1.8</java.version> <hadoop.version>2.7.3</hadoop.version> <hbase.version>1.2.6</hbase.version> <hive.version>2.3.1</hive.version> <zookeeper.version>3.4.8</zookeeper.version> <curator.version>4.0.0</curator.version> <fastjson.version>1.2.41</fastjson.version> <mahout.version>0.13.0</mahout.version> <kafka.version>0.11.0.2</kafka.version> <zkclient.version>0.10</zkclient.version> <junit.version>4.12</junit.version> </properties> <dependencies> <!-- 配置Zookeeper --> <dependency> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> <version>${zookeeper.version}</version> </dependency> <!-- Netflix Zookeeper组件 --> <dependency> <groupId>org.apache.curator</groupId> <artifactId>curator-client</artifactId> <version>${curator.version}</version> </dependency> <!-- Netflix Zookeeper组件 --> <dependency> <groupId>com.101tec</groupId> <artifactId>zkclient</artifactId> <version>${zkclient.version}</version> </dependency> <!-- Hadoop --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <!-- Hbase --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> <!-- hive --> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive.version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>${hive.version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-metastore</artifactId> <version>${hive.version}</version> </dependency> <!-- Kafka --> <!-- <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>${kafka.version></version> </dependency> --> <!-- mahout --> <dependency> <groupId>org.apache.mahout</groupId> <artifactId>mahout-math</artifactId> <version>${mahout.version}</version> </dependency> <dependency> <groupId>org.apache.mahout</groupId> <artifactId>mahout-hdfs</artifactId> <version>${mahout.version}</version> </dependency> <!-- Alibaba FastJson --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>${fastjson.version}</version> </dependency> <!-- 配置JUNIT --> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>${junit.version}</version> </dependency> <!-- 覆盖默认Guava(hive)版本,防止出现Guava版本冲突问题 --> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>11.0.2</version> </dependency> </dependencies> <!-- 指定maven项目的JDK版本 --> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>${java.version}</source> <target>${java.version}</target> </configuration> </plugin> </plugins> </build> </project>
注意: hadoop运行其实并不依赖与Hadoop Eclipse插件,Hadoop Eclipse插件只是简单的封装Hadoop的配置参数,本质也是通过Hadoop的API访问的,将HDFS文件系统以树结构的形式呈现。
项目结构:
阅读全文
0 0
- windows平台远程访问HDFS文件系统
- java访问HDFS文件系统
- 远程访问hdfs
- Hadoop平台Hdfs分布式文件系统
- eclipse访问hdfs文件系统资源
- java api 远程访问hdfs
- Fedora 访问windows文件系统
- 配置GP访问Hadoop的HDFS文件系统
- 利用libhdfs访问分布式文件系统(hdfs)
- window eclipse 访问远程虚拟机 Hdfs
- Windows.NETServer:远程访问
- windows 局域网远程访问
- windows远程访问linux
- windows远程访问linux
- windows远程访问linux
- Windows远程访问Ubuntu
- windows远程访问
- Fedora17下用samba实现windows和Linux跨平台文件系统访问
- 数组与集合的区别
- 如何设置脑图软件Mindmaster背景主题
- 28 迭代器适配器(学自Boolean)
- 最小费用最大流的另类算法
- spring-boot注解
- windows平台远程访问HDFS文件系统
- Win10下eclipse与tomcat和Maven的集成与配置
- 小星星
- 实战 MLP CNN 实践mnist
- 开发者的macji qi磁盘空间清理方法
- Javascript学习笔记(DOM 模型概述)
- [转]Special Judge在HUSTOJ中的使用
- 673. Number of Longest Increasing Subsequence
- 一张图看透目前人工智能知识体系