HDFS JAVA API

来源:互联网 发布:怎么提高淘宝信誉评级 编辑:程序博客网 时间:2024/05/21 06:43

测试以下代码全部有效。


1.检查文件或文件夹是否存在

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            public class CheckFileExist {                /**          * @param args          * @throws Exception           */          public static void main(String[] args) throws Exception {              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //设置要查看的文件的目录              Path showpath = new Path("/home");                            boolean isExist = hdfs.exists(showpath);              System.out.println("Exist?"+isExist);          }            }  

2.将本地文件上传到HDFS的指定的位置

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileStatus;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //将本地文件上传到HDFS的指定的位置上,其中src和dst为文件的完整路径      public class CopyFile {                /**          * @param args          */          public static void main(String[] args)throws Exception {              // TODO Auto-generated method stub              Configuration conf =new Configuration();              FileSystem hdfs = FileSystem.get(conf);              //设置本地文件路径              Path src = new Path("/home/caiyong/桌面/ubuntu命令");              //设置上传的HDFS文件路径              Path dst = new Path("hdfs://10.20.68.112/home/caiyong/testCopyFile/");                            hdfs.copyFromLocalFile(src, dst);              System.out.println("Upload to "+conf.get("fs.default.name"));              FileStatus[] files = hdfs.listStatus(dst);              for(FileStatus file:files){                  System.out.println(file.getPath());              }                        }            }

3.创建HDFS目录

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //创建HDFS目录      public class CreateDIr {                /**          * @param args          */          public static void main(String[] args) throws Exception {              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //设置文件路径  *不要忘记前面的斜冈*              Path dfs = new Path("/testCreateDir");              //创建文件夹              hdfs.mkdirs(dfs);              System.out.println("SUCCESS!");          }            }  

4.在HDFS上面创建文件

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FSDataOutputStream;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //在HDFS上面创建文件,f为文件的完整路径      public class CreateFIle {                /**          * @param args          */          public static void main(String[] args) throws Exception{              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);              byte[] buff = "Hello1CaiYong!".getBytes();              //设置文件的路径              Path dfs = new Path("/testCreateFile");              //采用FileSystem.create(Path f)来创建文件              FSDataOutputStream outputStream = hdfs.create(dfs);              outputStream.write(buff,0,buff.length);              System.out.println("SUCCESS!");          }            }  

5.删除HDFS上的文件

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //删除HDFS上的文件      //删除HDFS上的目录同这个一样,如果目录下有文件,要进行递归删除      public class DeleteFile {                /**          * @param args          */          public static void main(String[] args) throws Exception{              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //删除文件的完整路径              Path delpath = new Path("");              boolean isdelete = hdfs.delete(delpath, false);                            //递归删除              //boolean isdelete = hdfs.delete(delpath,true);              System.out.println("Delete?"+isdelete);                        }            }  

6.查看文件在集群中的位置

    //查看文件在集群中的位置      import java.io.IOException;            import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.BlockLocation;      import org.apache.hadoop.fs.FileStatus;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            public class FileLocation {                /**          * @param args          * @throws IOException           */          public static void main(String[] args) throws IOException {              // TODO Auto-generated method stub              //声明两个实例              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //设置文件路径,不能是目录,必须是文件的完整路径              Path filepath = new Path("/home/caiyong/hdfs-site.xml");              FileStatus status = hdfs.getFileStatus(filepath);                            //查找指定文件在集群中的位置              BlockLocation[] bloc = hdfs.getFileBlockLocations(status, 0, status.getLen());              int blocklen = bloc.length;              //遍历出集群中的位置              for(int i=0;i<blocklen;i++)              {                  String[] hosts = bloc[i].getHosts();                  System.out.println("Block"+i+"'s location is:"+hosts[0]);              }              System.out.println("SUCCESS!");          }            }  

7.获取集群中所有节点的信息

    import java.io.IOException;            import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.hdfs.DistributedFileSystem;      import org.apache.hadoop.hdfs.protocol.DatanodeInfo;            public class GetNodeInfo {                /**          * @param args          * @throws IOException           */          public static void main(String[] args) throws IOException {              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem fs = FileSystem.get(conf);              DistributedFileSystem hdfs = (DistributedFileSystem)fs;                            //获取所有节点的名称              DatanodeInfo[] datanodeinfo = hdfs.getDataNodeStats();              //循环输出节点信息              for(int i=0;i<datanodeinfo.length;i++)              {                  System.out.println("DataNode "+i+" 的Name is  :  "+datanodeinfo[i].getHostName());              }              System.out.println("SUCCESS1");          }            }  


8.查看文件最后修改的时间

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileStatus;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //查看文件最后修改的时间      public class LatestModifyTime {                /**          * @param args          * @throws Exception           */          public static void main(String[] args) throws Exception {              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //设置要查看文件的路径              Path filepath = new Path("/home/caiyong/hdfs-site.xml");                            FileStatus fileStatus = hdfs.getFileStatus(filepath);              //查看指定文件的修改时间              long modifytime = fileStatus.getModificationTime();              System.out.println(filepath+"的最后修改时间是:"+modifytime);          }            }  

9.查看目录下的所有文件

import java.io.IOException;    import org.apache.hadoop.conf.Configuration;  import org.apache.hadoop.fs.FileStatus;  import org.apache.hadoop.fs.FileSystem;  import org.apache.hadoop.fs.Path;    public class ListAllFIles {        /**      * @param args      * @throws IOException       */      public static void main(String[] args) throws IOException {          // TODO Auto-generated method stub          Configuration conf = new Configuration();          FileSystem hdfs = FileSystem.get(conf);                    //设置文件路径          Path listpath = new Path("/home/caiyong/biginput");          //通过FileStatus.getPath()循环查看某一个目录下面的所有的文件          FileStatus[] status = hdfs.listStatus(listpath);          //循环遍历          for(int i=0;i<status.length;i++)          {              System.out.println(status[i].getPath().toString());          }          hdfs.close();      }    } 

10.重命名HDFS文件

    import org.apache.hadoop.conf.Configuration;      import org.apache.hadoop.fs.FileSystem;      import org.apache.hadoop.fs.Path;            //HDFS上重命名文件,src和dst都为完整的文件路径      public class RenameFile {                /**          * @param args          */          public static void main(String[] args) throws Exception{              // TODO Auto-generated method stub              Configuration conf = new Configuration();              FileSystem hdfs = FileSystem.get(conf);                            //设置旧的文件名              Path oldpath = new Path("/testCreateFile");              //设置新的文件名              Path newpath =new Path("/testCreateFileRenameTest");              //重新命名              boolean isRename = hdfs.rename(oldpath, newpath);              String result = isRename?"SUCCESS":"FALSE";              System.out.println("文件重名名的结果为:"+result);          }            }  

11.通过URL查看HDFS文件内容

    import java.io.InputStream;      import java.net.URL;            import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;      import org.apache.hadoop.io.IOUtils;            //通过URL查看文件内容      public class URLCat {          static{              URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());          }      public static void main(String[] args)throws Exception{          InputStream in=null;          try{              in = new URL(args[0]).openStream();              IOUtils.copyBytes(in, System.out, 4096,false);          }finally{              IOUtils.closeStream(in);          }      }      }  





0 0
原创粉丝点击