hadoop java HDFS 读写操作

来源:互联网 发布:客户维护软件 编辑:程序博客网 时间:2024/05/19 20:37

用java的API操作HDFS的文件系统。那篇文章单独介绍了每个api的使用,为了以后方便使用,写成了一个类,分享给大家。

文件操作类:比较简单,大家也可以自己完善。

package storm.hadoop;import java.io.IOException;import java.net.URI;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.BlockLocation;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IOUtils;public class HDFS_File {Configuration config = null;FileSystem fs = null;DistributedFileSystem hdfs = null;// constructorHDFS_File(String uri) throws IOException {config = new Configuration();fs = FileSystem.get(URI.create(uri), config);hdfs = (DistributedFileSystem) fs;}// read the file from HDFSpublic void ReadFile(String FileName) {try {FSDataInputStream dis = hdfs.open(new Path(FileName));IOUtils.copyBytes(dis, System.out, 4096, false);dis.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// write the file from HDFSpublic void WriteFile(String FileName, String content) {try {FSDataOutputStream os = hdfs.create(new Path(FileName));os.write(content.getBytes("UTF-8"));os.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// copy the file from HDFS to localpublic void GetFile(String srcFile, String dstFile) {try {Path srcPath = new Path(srcFile);Path dstPath = new Path(dstFile);hdfs.copyToLocalFile(true, srcPath, dstPath);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// copy the local file to HDFSpublic void PutFile(String srcFile, String dstFile) {try {Path srcPath = new Path(srcFile);Path dstPath = new Path(dstFile);hdfs.copyFromLocalFile(srcPath, dstPath);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// create the new filepublic FSDataOutputStream CreateFile(String FileName) {try {Path path = new Path(FileName);FSDataOutputStream outputStream = hdfs.create(path);return outputStream;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return null;}// rename the file namepublic boolean ReNameFile(String srcName, String dstName) {try {Path fromPath = new Path(srcName);Path toPath = new Path(dstName);boolean isRenamed = hdfs.rename(fromPath, toPath);return isRenamed;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return false;}// delete the file// tyep = true, delete the directory// type = false, delece the filepublic boolean DelFile(String FileName, boolean type) {try {Path path = new Path(FileName);boolean isDeleted = hdfs.delete(path, type);return isDeleted;} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return false;}// Get HDFS file last modification timepublic long GetFileModTime(String FileName) {try {Path path = new Path(FileName);FileStatus fileStatus = hdfs.getFileStatus(path);long modificationTime = fileStatus.getModificationTime();return modificationTime;} catch (IOException e) {e.printStackTrace();}return 0;}// checke if a file exists in HDFSpublic boolean CheckFileExist(String FileName) {try {Path path = new Path(FileName);boolean isExists = hdfs.exists(path);return isExists;} catch (IOException e) {e.printStackTrace();}return false;}// Get the locations of a file in the HDFS clusterpublic List<String[]> GetFileBolckHost(String FileName) {try {List<String[]> list = new ArrayList<String[]>();Path path = new Path(FileName);FileStatus fileStatus = hdfs.getFileStatus(path);BlockLocation[] blkLocations = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());int blkCount = blkLocations.length;for (int i = 0; i < blkCount; i++) {String[] hosts = blkLocations[i].getHosts();list.add(hosts);}return list;} catch (IOException e) {e.printStackTrace();}return null;}// Get a list of all the nodes host names in the HDFS clusterpublic String[] GetAllNodeName() {try {DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();String[] names = new String[dataNodeStats.length];for (int i = 0; i < dataNodeStats.length; i++) {names[i] = dataNodeStats[i].getHostName();System.out.println(names[i]);}return names;} catch (IOException e) {e.printStackTrace();}return null;}}
自己写的测试程序:

package storm.hadoop;import java.io.IOException;import java.io.InputStream;import java.net.URI;import java.util.Date;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.protocol.DatanodeInfo;import org.apache.hadoop.io.IOUtils;public class File_Operation {public static void main(String args[]) throws IOException {Configuration conf = new Configuration();String uri = "hdfs://localhost:9000/user/root/input/file02";conf.addResource(uri);HDFS_File file = new HDFS_File(uri);// print all the node nameString[] host_name = file.GetAllNodeName();for (int i = 0; i < host_name.length; i++) {System.out.println("the host name:" + host_name[i]);}String dstFile = "/user/root/my_test/a.txt";// read the filefile.WriteFile(dstFile, "hello mgq");file.ReadFile(dstFile);// check the file existsif (file.CheckFileExist(dstFile) == true) {System.out.println(dstFile + "the file exists");} else {System.out.println(dstFile + "the file not exists");}// copy the file to HDFSString srcFile = "/home/test.txt";dstFile = "/user/root/my_test/test.txt";String ToFile = "/home/test1.txt";file.PutFile(srcFile, dstFile);System.out.println("copy file ok!");// check the file last modfiy timelong mod_time = file.GetFileModTime(dstFile);Date d = new Date(mod_time);System.out.println("the modefile time" + d);// get the locations of a file in HDFSList<String[]> list = file.GetFileBolckHost(dstFile);for (int i = 0; i < list.size(); i++) {for (int j = 0; j < list.get(i).length; j++) {System.out.println("the bolck host name:" + list.get(i)[j]);}}file.GetFile(dstFile, ToFile);}}



参考

http://blog.csdn.net/jackydai987/article/details/6227299#comments