Java操作hdfs文件实例

来源:互联网 发布:网络销售的渠道 编辑:程序博客网 时间:2024/05/22 12:36

1.创建HDFS目录
通过"FileSystem.mkdirs(Path f)"可在HDFS上创建文件夹,其中f为文件夹的完整路径。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CreatDir {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf=new Configuration();FileSystem hdfs=FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path path=new Path("/testdir");hdfs.mkdirs(path);}}

2.重命名HDFS文件
通过"FileSystem.rename(Path src,Path dst)"可为指定的HDFS文件重命名,其中src和dst均为文件的完整路径。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class Rename {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem hdfs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path oldPath = new Path("/testdir");Path newPath = new Path("/testdirnew"); //这里也可以是文件名boolean isRename = hdfs.rename(oldPath, newPath);String result = isRename ? "成功" : "失败";System.out.println("文件重命名结果为:" + result);}}


3.创建文件

通过"FileSystem.create(Path f)"可在HDFS上创建文件,其中f为文件的完整路径。

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CreateFile {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf=new Configuration();FileSystem hdfs=FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);byte[] buff="hellow hadoop world".getBytes();       //字符型数组Path dfs=new Path("/test/test");                    //创建Path(URI)FSDataOutputStream outputStream = hdfs.create(dfs); //hadoop的读(FSDataInputStream)System.out.println("创建问价夹");outputStream.write(buff, 0, buff.length);           //FSDataOutputStream的写,write有很多种重载的方法System.out.println("创建文件结束");}}


4.删除HDFS上的文件
通过"FileSystem.delete(Path f,Boolean recursive)"可删除指定的HDFS文件,其中f为需要删除文件的完整路径,recuresive用来确定是否进行递归删除。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem hdfs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path path = new Path("/test/test.txt");boolean isDelete = hdfs.delete(path, false); // 这里的false表示不递归删除,如果删文件夹可以设置为true。String result = isDelete ? "成功" : "失败";System.out.println("文件删除结果:" + result);}}

5.查看某个HDFS文件是否存在

通过"FileSystem.exists(Path f)"可查看指定HDFS文件是否存在,其中f为文件的完整路径。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CheckFile {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem hdfs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path path = new Path("/test");boolean isExists = hdfs.exists(path);String result = isExists ? "存在" : "不存在";System.out.println("文件" + result);}}


6.查看HDFS文件的最后修改时间
通过"FileSystem.getModificationTime()"可查看指定HDFS文件的修改时间。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class GetLastTime {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem hdfs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path path = new Path("/test/test");FileStatus status = hdfs.getFileStatus(path); // FileStatus文件系统元数据信息long lastTime = status.getModificationTime(); // 获取最后修改时间Date date = new Date(lastTime);System.out.println("最后的修改时间:" + new Date(lastTime)); // 时间格式化}}


7.查找某个文件在HDFS集群的位置
通过"FileSystem.getFileBlockLocation(FileStatus file,long start,long len)"可查找指定文件在HDFS集群上的位置,其中file为文件的完整路径,start和len来标识查找文件的路径。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class FileLoc {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem hdfs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);Path path = new Path("/test/test");FileStatus filestatus = hdfs.getFileStatus(path);BlockLocation[] blocklocation = hdfs.getFileBlockLocations(filestatus, 0, filestatus.getLen());for (int i = 0; i < blocklocation.length; i++) {String[] hosts = blocklocation[i].getHosts();System.out.println("block_" + i + "_location:" + hosts[0]);}}}

8.获取HDFS集群上所有节点名称信息

通过"DatanodeInfo.getHostName()"可获取HDFS集群上的所有节点名称。具体实现如下:

package iflytek;import java.io.IOException;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;public class GetList {public static void main(String[] args) throws IOException, URISyntaxException {Configuration conf = new Configuration();FileSystem fs = FileSystem.get(new URI("hdfs://192.168.61.151:8020"), conf);DistributedFileSystem hdfs = (DistributedFileSystem) fs;DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();for (int i = 0; i < dataNodeStats.length; i++) {System.out.println("DataNode_" + i + "_Name:" + dataNodeStats[i].getHostName());}}}