Hadoop 文件的基本操作

来源：互联网发布：双色球数据图表新浪网编辑：程序博客网时间：2024/04/27 23:53

刚学习完对hadoop的文件基本操作相关内容，特记录下来，方便以后复习查看。

Hadoop集群NameNode地址：192.168.2.100，端口：9000

引入的jar包：

可能会出现重复的jar文件，覆盖即可

附：代码（记得加入Junit相关Jar包）

package cn.guyouda.hadoop;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.URI;import java.net.URISyntaxException;import org.apache.commons.io.IOUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.LocatedFileStatus;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.RemoteIterator;import org.junit.Before;import org.junit.Test;public class HDFSUtil {public FileSystem fileSystem = null;@Beforepublic void init() throws IOException, URISyntaxException, InterruptedException{//配置链接信息Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://192.168.2.100:9000/");/* * 第一个参数是节点URI，第二个参数是配置信息，第三个参数是用户名字（需要配置一个有权限操作该文件夹或文件的用户名） */fileSystem = FileSystem.get(new URI("hdfs://192.168.2.100:9000/"),conf,"hadoop");}/** * 原始下载方法 * @throws IOException */@Testpublic  void downloadFile() throws IOException {Path path = new Path("hdfs://192.168.2.100:9000/jdk-7u65-linux-i586.tar.gz");FSDataInputStream in = fileSystem.open(path);FileOutputStream out = new FileOutputStream("D:\\jdk.tar.gz");IOUtils.copy(in, out);}/** * 利用封装好的方法下载文件 * @throws IOException */public void downloadFile2() throws IOException{Path dst = new Path("D:/readme.txt");Path src = new Path("hdfs://192.168.2.100:9000/readme.txt");fileSystem.copyToLocalFile(src, dst);}/** * 自己手动上传文件 * @throws IOException */@Testpublic void uploadFile() throws IOException{Path dest = new Path("hdfs://192.168.2.100:9000/readme.txt");FSDataOutputStream out = fileSystem.create(dest);InputStream in = new FileInputStream("D:/readme.txt");IOUtils.copy(in, out);}/** * 利用封装好的API上传 * @throws IOException */@Testpublic void uploadFile2() throws IOException{Path src = new Path("D:/readme.txt");Path dst = new Path("hdfs://192.168.2.100:9000/aa/bb/readme2.txt");fileSystem.copyFromLocalFile(src, dst);}/** * 添加文件夹 * @throws IllegalArgumentException * @throws IOException */@Testpublic void mkDir() throws IllegalArgumentException, IOException{fileSystem.mkdirs(new Path("/aa/bb/cc"));}/** * 删除文件夹 * @throws IllegalArgumentException * @throws IOException */@Testpublic void delDir() throws IllegalArgumentException, IOException{/* * param 1:需要删除的文件夹 * param2:是否需要递归删除子文件夹 */fileSystem.delete(new Path("/aa"), true);}/** * * @throws IllegalArgumentException * @throws IOException */@Testpublic void listDir() throws IllegalArgumentException, IOException{// 递归列出该目录下的所有文件，没有文件夹RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/"), true);while(files.hasNext()){LocatedFileStatus file = files.next();System.out.println(file.getPath());System.out.println(file.getAccessTime());System.out.println(file.getBlockSize());System.out.println(file.getGroup());System.out.println(file.getLen());System.out.println(file.getModificationTime());System.out.println(file.getOwner());System.out.println(file.getBlockLocations());}System.out.println("===================================");//列出当前文件夹下的文件和文件夹FileStatus[] fileStatus = fileSystem.listStatus(new Path("/"));for(FileStatus fStatus:fileStatus){System.out.println(fStatus.getPath());System.out.println(fStatus.getAccessTime());System.out.println(fStatus.getBlockSize());System.out.println(fStatus.getGroup());System.out.println(fStatus.getLen());System.out.println(fStatus.getModificationTime());System.out.println(fStatus.getOwner());}}}

注：记得开启HDFS，.../hadoop-2.4.1/sbin/start-dfs.sh, 还有防火墙规则（由于该集群通常是内网运行，可选择关闭防火墙）

阅读全文

1 0