Hadoop 文件的基本操作

来源:互联网 发布:双色球数据图表新浪网 编辑:程序博客网 时间:2024/04/27 23:53


刚学习完对hadoop的文件基本操作相关内容,特记录下来,方便以后复习查看。

Hadoop集群NameNode地址:192.168.2.100,端口:9000

引入的jar包:

可能会出现重复的jar文件,覆盖即可




附:代码(记得加入Junit相关Jar包)


package cn.guyouda.hadoop;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.net.URI;import java.net.URISyntaxException;import org.apache.commons.io.IOUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.LocatedFileStatus;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.RemoteIterator;import org.junit.Before;import org.junit.Test;public class HDFSUtil {public FileSystem fileSystem = null;@Beforepublic void init() throws IOException, URISyntaxException, InterruptedException{//配置链接信息Configuration conf = new Configuration();conf.set("fs.defaultFS", "hdfs://192.168.2.100:9000/");/* * 第一个参数是节点URI,第二个参数是配置信息,第三个参数是用户名字(需要配置一个有权限操作该文件夹或文件的用户名) */fileSystem = FileSystem.get(new URI("hdfs://192.168.2.100:9000/"),conf,"hadoop");}/** * 原始下载方法 * @throws IOException */@Testpublic  void downloadFile() throws IOException {Path path = new Path("hdfs://192.168.2.100:9000/jdk-7u65-linux-i586.tar.gz");FSDataInputStream in = fileSystem.open(path);FileOutputStream out = new FileOutputStream("D:\\jdk.tar.gz");IOUtils.copy(in, out);}/** * 利用封装好的方法下载文件 * @throws IOException */public void downloadFile2() throws IOException{Path dst = new Path("D:/readme.txt");Path src = new Path("hdfs://192.168.2.100:9000/readme.txt");fileSystem.copyToLocalFile(src, dst);}/** * 自己手动上传文件 * @throws IOException */@Testpublic void uploadFile() throws IOException{Path dest = new Path("hdfs://192.168.2.100:9000/readme.txt");FSDataOutputStream out = fileSystem.create(dest);InputStream in = new FileInputStream("D:/readme.txt");IOUtils.copy(in, out);}/** * 利用封装好的API上传 * @throws IOException */@Testpublic void uploadFile2() throws IOException{Path src = new Path("D:/readme.txt");Path dst = new Path("hdfs://192.168.2.100:9000/aa/bb/readme2.txt");fileSystem.copyFromLocalFile(src, dst);}/** * 添加文件夹 * @throws IllegalArgumentException * @throws IOException */@Testpublic void mkDir() throws IllegalArgumentException, IOException{fileSystem.mkdirs(new Path("/aa/bb/cc"));}/** * 删除文件夹 * @throws IllegalArgumentException * @throws IOException */@Testpublic void delDir() throws IllegalArgumentException, IOException{/* * param 1:需要删除的文件夹 * param2:是否需要递归删除子文件夹 */fileSystem.delete(new Path("/aa"), true);}/** * * @throws IllegalArgumentException * @throws IOException */@Testpublic void listDir() throws IllegalArgumentException, IOException{// 递归列出该目录下的所有文件,没有文件夹RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(new Path("/"), true);while(files.hasNext()){LocatedFileStatus file = files.next();System.out.println(file.getPath());System.out.println(file.getAccessTime());System.out.println(file.getBlockSize());System.out.println(file.getGroup());System.out.println(file.getLen());System.out.println(file.getModificationTime());System.out.println(file.getOwner());System.out.println(file.getBlockLocations());}System.out.println("===================================");//列出当前文件夹下的文件和文件夹FileStatus[] fileStatus = fileSystem.listStatus(new Path("/"));for(FileStatus fStatus:fileStatus){System.out.println(fStatus.getPath());System.out.println(fStatus.getAccessTime());System.out.println(fStatus.getBlockSize());System.out.println(fStatus.getGroup());System.out.println(fStatus.getLen());System.out.println(fStatus.getModificationTime());System.out.println(fStatus.getOwner());}}}

注:记得开启HDFS,.../hadoop-2.4.1/sbin/start-dfs.sh, 还有防火墙规则(由于该集群通常是内网运行,可选择关闭防火墙)