Hadoop文件系统的操作

来源：互联网发布：亚马逊大数据编辑：程序博客网时间：2024/05/16 10:01

从Hadoop文件系统中读取文件
使用java.net.URL对象打开数据流，从中读取数据：
InputStream in = null;
try{
in = new URL("hdfs://host/paath").openStream();
}finally{
IOUtils.closeStream(in);
}

通过URLStreamHandler实例以标准输出方式显示Hadoop文件系统的文件
public class URLCat{
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception{
try{
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}

直接使用FileSystem以标准输出格式显示Hadoop文件系统中的文件
public class FileSystemCat{
public static void main(String[] args) throws Exception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri),conf);
InputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FileSystem对象中的open()方法返回的是FSDataInputStream对象。
public class FSDataInputStream extends DataInputStream implementation Seekable,PositionedReadable{
}
public interface Seekable{
void seek(long pos) throws IOException;
long getPos() throws IOException;
boolean seekToNewSource(long targetPos) throws IOWxception;
}

使用seek()方法，将Hadoop文件系统中的一个文件在标准输出上显示两次
public class FileSystemDoubleCat{
public static void main(String[] args) throws Exxception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FSDataInputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
IOutils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FSDataInputStream类也实现了PositionedReadable接口，从一个指定偏移量处读取文件的一部分：
public interface PositionedReadable{
public int read(long position, byte[] buffer, int offset, int length) throws IOException;
public void redFully(long position, byte[] buffer, int offset, int length) throws IOException;
public void readFully(long position, byte[] buffer) throws IOException;
}

写入数据
将本地文件复制到Hadoop文件系统
public class FileCopyWithProgress{
public static void main(String[] args) throws IOException{
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));

Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst),conf);
OutputStream out = fs.create(new Path(dst), new Progressable()){
public void progress(){
System.out.print(".");
}
}
IOUtils.copyBytes(in, out, 4096, true);
}
}

显示Hadoop文件系统中一组路径的文件信息
public class ListStatus {

public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);

Path[] paths = new Path[args.length];
for (int i = 0; i < paths.length; i++) {
paths[i] = new Path(args[i]);
}

FileStatus[] status = fs.listStatus(paths);
Path[] listedPaths = FileUtil.stat2Paths(status);
for (Path p : listedPaths) {
System.out.println(p);
}
}
}

0 0