HDFS Java API

来源:互联网 发布:python快速排序 编辑:程序博客网 时间:2024/05/18 00:42

HDFS Java API

官网
http://hadoop.apache.org/docs/r2.7.3/api/index.html

1.读取HDFS文件

package test;import java.io.IOException;import java.io.InputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;public class ReadFile {    public static void main(String[] args) throws IOException {        String uri="hdfs://192.168.1.25:8020/user/root/wordcount/input/words.txt";        Configuration cfg=new Configuration();        FileSystem fs=  FileSystem.get(URI.create(uri),cfg);        InputStream in=null;        try{            in=fs.open(new Path(uri));            IOUtils.copyBytes(in, System.out,4096,false);        }catch(Exception e){            System.out.println(e.getMessage());        }finally{            IOUtils.closeStream(in);        }    }}

直接在Eclipse中运行:
Run as -> Run Application

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".SLF4J: Defaulting to no-operation (NOP) logger implementationSLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.三月 02, 2017 4:42:20 下午 org.apache.hadoop.util.NativeCodeLoader <clinit>警告: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable三月 02, 2017 4:42:20 下午 org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory <init>警告: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.hihello,worldhello,hadoophello,javahi,baby

在服务器端运行
将该类导出为 ReadFile.jar:

  • 在eclipse中选择要导出的类或者package
  • 右击选择Export子选项
  • 在弹出的对话框中,选择Java目,选择JAR file
  • 在JAR Export对话框中的JAR file文本框中选择你要生成的jar包的位置以及名字,比如此处是/root/ReadFile.jar
  • 注意在Export generated class files and resources和Export java source files and resources前面打上勾

将jar上传集群中一个节点下

[root@hadron ~]# scp ReadFile.jar 192.168.1.25:/rootroot@192.168.1.25's password: ReadFile.jar                                  100% 1248     1.2KB/s   00:00

在anode1(192.168.1.25)节点上执行命令:

[root@anode1 ~]# hadoop jar ReadFile.jar  test.ReadFilehihello,worldhello,hadoophello,javahi,baby

2.上传HDFS文件

[root@hadron ~]# cat test.txt
test
Hi,HDFS!

package test;import java.io.BufferedInputStream;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;public class PutFile {    public static void main(String[] args) throws IOException {        //本地文件路径        String local="/root/test.txt";        String dest="hdfs://192.168.1.25:8020/user/root/wordcount/input/test2.txt";        InputStream in=new BufferedInputStream(new FileInputStream(local));        Configuration cfg=new Configuration();        FileSystem fs=  FileSystem.get(URI.create(dest),cfg);        OutputStream out=fs.create(new Path(dest));        IOUtils.copyBytes(in, out,4096,true);        fs.close();        IOUtils.closeStream(in);    }}
[root@anode1 ~]# hadoop fs -ls /user/root/wordcount/inputFound 3 items-rw-r--r--   3 root hdfs         14 2017-03-02 17:09 /user/root/wordcount/input/test2.txt-rw-r--r--   3 root hdfs         47 2017-03-01 09:53 /user/root/wordcount/input/words.txt-rw-r--r--   3 root hdfs         47 2017-03-01 10:16 /user/root/wordcount/input/words2.txtYou have new mail in /var/spool/mail/root[root@anode1 ~]# hadoop fs -cat /user/root/wordcount/input/test2.txttestHi,HDFS![root@anode1 ~]# 

3 下载文件

package test;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import java.io.IOException;import java.net.URI;public class GetFile {    public static void main(String[] args) throws IOException {        String hdfsPath="hdfs://192.168.1.25:8020/user/root/wordcount/input/words.txt";        String localPath="/root/words";        Configuration conf = new Configuration();        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);        Path hdfs_path = new Path(hdfsPath);        Path local_path = new Path(localPath);        fs.copyToLocalFile(hdfs_path, local_path);        fs.close();    }}
[root@hadron ~]# cat wordshihello,worldhello,hadoophello,javahi,baby

4 创建HDFS目录

package test;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CreateDir {    public static void main(String[] args) throws IOException {        String rootPath = "hdfs://192.168.1.25:8020";//dfs.namenode.rpc-address.mycluster.m1  hdfs://172.16.5.174:8020        Path p = new Path(rootPath + "/tmp/");        Configuration conf = new Configuration();        FileSystem fs = p.getFileSystem(conf);        boolean b = fs.mkdirs(p);        System.out.println(b);        fs.close();    }}

执行

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".SLF4J: Defaulting to no-operation (NOP) logger implementationSLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.三月 03, 2017 8:39:52 上午 org.apache.hadoop.util.NativeCodeLoader <clinit>警告: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable三月 03, 2017 8:39:53 上午 org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory <init>警告: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.true
[root@anode1 ~]# hadoop fs -ls /Found 9 itemsdrwxrwxrwx   - yarn   hadoop          0 2017-02-22 12:48 /app-logsdrwxr-xr-x   - hdfs   hdfs            0 2017-02-22 12:29 /appsdrwxr-xr-x   - yarn   hadoop          0 2017-02-22 12:16 /atsdrwxr-xr-x   - hdfs   hdfs            0 2017-02-22 12:17 /hdpdrwxr-xr-x   - mapred hdfs            0 2017-02-22 12:17 /mapreddrwxrwxrwx   - mapred hadoop          0 2017-02-22 12:17 /mr-historydrwxrwxrwx   - spark  hadoop          0 2017-02-24 09:39 /spark2-historydrwxrwxrwx   - hdfs   hdfs            0 2017-02-27 08:32 /tmpdrwxr-xr-x   - hdfs   hdfs            0 2017-02-22 18:22 /user

5 删除HDFS文件或文件夹

[root@anode1 ~]# hadoop fs -ls /user/root/wordcountFound 2 itemsdrwxr-xr-x   - root hdfs          0 2017-03-02 17:12 /user/root/wordcount/inputdrwxr-xr-x   - root hdfs          0 2017-03-01 10:22 /user/root/wordcount/output
package test;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class DeleteFile {    public static void main(String[] args) {        String uri = "hdfs://192.168.1.25:8020/user/root/wordcount/output";        Path path = new Path(uri);        Configuration conf = new Configuration();        try {            FileSystem fs = path.getFileSystem(conf);            //递归删除文件夹及文件夹下的文件            boolean b = fs.delete(path, true);            System.out.println(b);            fs.close();        } catch (IOException e) {            e.printStackTrace();        }    }}
[root@anode1 ~]# hadoop fs -ls /user/root/wordcountFound 1 itemsdrwxr-xr-x   - root hdfs          0 2017-03-02 17:12 /user/root/wordcount/input

6 输出HDFS指定目录下的文件和子目录

package test;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class ListFiles {    public static void main(String[] args) throws IOException {        String uri = "hdfs://192.168.1.25:8020/user/root/";        Configuration cfg = new Configuration();        FileSystem fs=  FileSystem.get(URI.create(uri),cfg);        Path path = new Path(uri);        FileStatus[] fss = fs.listStatus(path);        for(FileStatus f:fss){            if(f.isFile())                System.out.println("File:"+f.getPath().toString());            else                System.out.println("Dir:"+f.getPath().toString());        }    }}

执行结果:

SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".SLF4J: Defaulting to no-operation (NOP) logger implementationSLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.三月 03, 2017 9:21:02 上午 org.apache.hadoop.util.NativeCodeLoader <clinit>警告: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable三月 03, 2017 9:21:02 上午 org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory <init>警告: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.Dir:hdfs://192.168.1.25:8020/user/root/.TrashDir:hdfs://192.168.1.25:8020/user/root/.hiveJarsDir:hdfs://192.168.1.25:8020/user/root/.sparkStagingDir:hdfs://192.168.1.25:8020/user/root/.stagingDir:hdfs://192.168.1.25:8020/user/root/wordcount

7 下载HDFS目录

HDFS存在的目录

[root@anode1 ~]# hadoop fs -ls /user/root/wordcountFound 1 itemsdrwxr-xr-x   - root hdfs          0 2017-03-02 17:12 /user/root/wordcount/input

本地目录

[root@hadron ~]# ls hdfs/input

程序

package test;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyToLocalFile {    public static void main(String[] args) throws IOException {        String hdfsPath="hdfs://192.168.1.25:8020/user/root/wordcount";        String localPath="/root/hdfs";        Configuration conf = new Configuration();        FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);        Path hdfs_path = new Path(hdfsPath);        Path local_path = new Path(localPath);        fs.copyToLocalFile(hdfs_path,local_path);        fs.close();    }}

执行结果

[root@hadron ~]# ls hdfs/input  wordcount[root@hadron ~]# ls hdfs/wordcount/input[root@hadron ~]# ls hdfs/wordcount/input/test2.txt  test3.txt  words2.txt  words.txt

8 上传本地目录(文件夹)

将本地/root/hdfs/input下文件重命名

[root@hadron ~]# ls hdfs/inputa.txt  b.txt  c.txt  d.txt

程序代码

package test;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class CopyFromLocalFile {    public static void main(String[] args) {        // TODO Auto-generated method stub        String hdfsPath="hdfs://192.168.1.25:8020/user/root";        String localPath="/root/hdfs/input";        Configuration conf = new Configuration();        try {            FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);            Path hdfs_path = new Path(hdfsPath);            Path local_path = new Path(localPath);            fs.copyFromLocalFile(local_path, hdfs_path);            fs.close();        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}

执行结果

[root@anode1 ~]# hadoop fs -ls /user/rootFound 6 itemsdrwx------   - root hdfs          0 2017-03-01 20:00 /user/root/.Trashdrwxr-xr-x   - root hdfs          0 2017-02-22 12:48 /user/root/.hiveJarsdrwxr-xr-x   - root hdfs          0 2017-02-24 09:39 /user/root/.sparkStagingdrwx------   - root hdfs          0 2017-03-01 10:22 /user/root/.stagingdrwxr-xr-x   - root hdfs          0 2017-03-03 11:15 /user/root/inputdrwxr-xr-x   - root hdfs          0 2017-03-03 08:49 /user/root/wordcountYou have new mail in /var/spool/mail/root[root@anode1 ~]# hadoop fs -ls /user/root/inputFound 4 items-rw-r--r--   3 root hdfs         14 2017-03-03 11:15 /user/root/input/a.txt-rw-r--r--   3 root hdfs         14 2017-03-03 11:15 /user/root/input/b.txt-rw-r--r--   3 root hdfs         47 2017-03-03 11:15 /user/root/input/c.txt-rw-r--r--   3 root hdfs         47 2017-03-03 11:15 /user/root/input/d.txt

9 HDFS的文件或目录下载

本地

[root@hadron ~]# vi /etc/hosts192.168.1.25    anode1[root@hadron ~]# ping anode1PING anode1 (192.168.1.25) 56(84) bytes of data.64 bytes from anode1 (192.168.1.25): icmp_seq=1 ttl=64 time=0.266 ms64 bytes from anode1 (192.168.1.25): icmp_seq=2 ttl=64 time=0.265 ms^C--- anode1 ping statistics ---2 packets transmitted, 2 received, 0% packet loss, time 999msrtt min/avg/max/mdev = 0.265/0.265/0.266/0.016 ms

新建目录conf
右击目录conf -> Build Path -> Use as source Folder
将集群配置文件复制到conf下
这里写图片描述

package test;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.URI;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import org.apache.hadoop.hdfs.HdfsConfiguration;import org.apache.hadoop.io.IOUtils;public class Downloader {    private static Log log = LogFactory.getLog(Downloader.class);    private String src;    private static Configuration conf;    public static DistributedFileSystem dfs;    static {        conf = new HdfsConfiguration();        dfs = new DistributedFileSystem();        try {            dfs.initialize(new URI(conf.get("fs.defaultFS")), conf);        } catch (Exception e) {            log.error(e.getMessage(), e);        }    }    public Downloader(String src) {        this.src = src;    }    public void download(String dest) {        Path path = new Path(src);        File file = new File(dest);        file.mkdirs();        try {            if (dfs.isFile(path)) {//下载文件                innerDownloadFile(src, dest);            } else {//下载目录                innerDownloadDir(src, dest);            }        } catch (IOException e) {            log.error(e.getMessage(), e);        }    }    private void innerDownloadFile(String src, String dest) {        Path path = new Path(src);        try {            if (dfs.exists(path)) {//HDFS上文件存在                //创建本地文件                File file = new File(dest + File.separator + path.getName());                //从HDFS输入,然后输出到本地文件                file.createNewFile();                InputStream in = dfs.open(path);                OutputStream out = new FileOutputStream(file);                IOUtils.copyBytes(in, out, conf);                in.close();                out.close();            }        } catch (Exception e) {            log.error(e.getMessage(), e);        }    }    private void innerDownloadDir(String src, String dest) {        Path path = new Path(src);        //创建本地目录        File file = new File(dest + File.separator + path.getName());        file.mkdirs();        try {            //由HDFS的path目录的文件或子目录组成的List或数组            FileStatus[] fss = dfs.listStatus(path);            for (int i = 0; i < fss.length; i++) {                if (fss[i].isFile()) {//当前元素是文件                    innerDownloadFile(fss[i].getPath().toString(), dest                            + File.separator + path.getName());                } else {//当前元素是子目录                    innerDownloadDir(fss[i].getPath().toString(),                            file.getAbsolutePath());                }            }        } catch (Exception e) {            log.error(e.getMessage(), e);        }    }    public static void main(String[] args) {        System.out.println(args[0]);        System.out.println(args[1]);        //args[0]是HDFS上的文件名        Downloader dl = new Downloader(args[0]);        //args[1]是下载到本地的文件名        dl.download(args[1]);    }}
[root@hadron ~]# ls hdfs/input/test2.txt  test3.txt  words2.txt  words.txt
0 0