Hadoop文件系统的操作
来源:互联网 发布:亚马逊大数据 编辑:程序博客网 时间:2024/05/16 10:01
从Hadoop文件系统中读取文件
使用java.net.URL对象打开数据流,从中读取数据:
InputStream in = null;
try{
in = new URL("hdfs://host/paath").openStream();
}finally{
IOUtils.closeStream(in);
}
通过URLStreamHandler实例以标准输出方式显示Hadoop文件系统的文件
public class URLCat{
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception{
try{
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
直接使用FileSystem以标准输出格式显示Hadoop文件系统中的文件
public class FileSystemCat{
public static void main(String[] args) throws Exception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri),conf);
InputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FileSystem对象中的open()方法返回的是FSDataInputStream对象。
public class FSDataInputStream extends DataInputStream implementation Seekable,PositionedReadable{
}
public interface Seekable{
void seek(long pos) throws IOException;
long getPos() throws IOException;
boolean seekToNewSource(long targetPos) throws IOWxception;
}
使用seek()方法,将Hadoop文件系统中的一个文件在标准输出上显示两次
public class FileSystemDoubleCat{
public static void main(String[] args) throws Exxception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FSDataInputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
IOutils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FSDataInputStream类也实现了PositionedReadable接口,从一个指定偏移量处读取文件的一部分:
public interface PositionedReadable{
public int read(long position, byte[] buffer, int offset, int length) throws IOException;
public void redFully(long position, byte[] buffer, int offset, int length) throws IOException;
public void readFully(long position, byte[] buffer) throws IOException;
}
写入数据
将本地文件复制到Hadoop文件系统
public class FileCopyWithProgress{
public static void main(String[] args) throws IOException{
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst),conf);
OutputStream out = fs.create(new Path(dst), new Progressable()){
public void progress(){
System.out.print(".");
}
}
IOUtils.copyBytes(in, out, 4096, true);
}
}
显示Hadoop文件系统中一组路径的文件信息
public class ListStatus {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path[] paths = new Path[args.length];
for (int i = 0; i < paths.length; i++) {
paths[i] = new Path(args[i]);
}
FileStatus[] status = fs.listStatus(paths);
Path[] listedPaths = FileUtil.stat2Paths(status);
for (Path p : listedPaths) {
System.out.println(p);
}
}
}
使用java.net.URL对象打开数据流,从中读取数据:
InputStream in = null;
try{
in = new URL("hdfs://host/paath").openStream();
}finally{
IOUtils.closeStream(in);
}
通过URLStreamHandler实例以标准输出方式显示Hadoop文件系统的文件
public class URLCat{
static{
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) throws Exception{
try{
in = new URL(args[0]).openStream();
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
直接使用FileSystem以标准输出格式显示Hadoop文件系统中的文件
public class FileSystemCat{
public static void main(String[] args) throws Exception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri),conf);
InputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FileSystem对象中的open()方法返回的是FSDataInputStream对象。
public class FSDataInputStream extends DataInputStream implementation Seekable,PositionedReadable{
}
public interface Seekable{
void seek(long pos) throws IOException;
long getPos() throws IOException;
boolean seekToNewSource(long targetPos) throws IOWxception;
}
使用seek()方法,将Hadoop文件系统中的一个文件在标准输出上显示两次
public class FileSystemDoubleCat{
public static void main(String[] args) throws Exxception{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FSDataInputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
IOutils.copyBytes(in, System.out, 4096, false);
}finally{
IOUtils.closeStream(in);
}
}
}
FSDataInputStream类也实现了PositionedReadable接口,从一个指定偏移量处读取文件的一部分:
public interface PositionedReadable{
public int read(long position, byte[] buffer, int offset, int length) throws IOException;
public void redFully(long position, byte[] buffer, int offset, int length) throws IOException;
public void readFully(long position, byte[] buffer) throws IOException;
}
写入数据
将本地文件复制到Hadoop文件系统
public class FileCopyWithProgress{
public static void main(String[] args) throws IOException{
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst),conf);
OutputStream out = fs.create(new Path(dst), new Progressable()){
public void progress(){
System.out.print(".");
}
}
IOUtils.copyBytes(in, out, 4096, true);
}
}
显示Hadoop文件系统中一组路径的文件信息
public class ListStatus {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path[] paths = new Path[args.length];
for (int i = 0; i < paths.length; i++) {
paths[i] = new Path(args[i]);
}
FileStatus[] status = fs.listStatus(paths);
Path[] listedPaths = FileUtil.stat2Paths(status);
for (Path p : listedPaths) {
System.out.println(p);
}
}
}
0 0
- Hadoop文件系统的操作
- hadoop文件系统操作(2)
- hadoop文件系统操作
- JAVA 操作hadoop分布式文件系统
- Hadoop中HDFS文件系统下的Shell操作
- 使用HDFS API实现hadoop HDFS文件系统的基本操作
- HDFS文件系统的基本操作 Hadoop权威指南
- hadoop文件系统的读取
- hadoop 的HDFS文件系统
- Hadoop的分布式文件系统
- hadoop的文件系统
- Hadoop学习<二>--HDFS文件系统操作方式
- hadoop的文件系统一致模型
- hadoop HDFS文件系统的特征
- hadoop HDFS文件系统的特征
- hadoop HDFS文件系统的特征
- hadoop HDFS文件系统的特征
- 文件系统的操作命令
- Maximum Subarray
- 网站代码知识收藏
- pinyin4j学习笔记
- 解决 pathForResource 返回 nil的问题
- Java web----DBCP
- Hadoop文件系统的操作
- android 多线程刷新界面
- Gradle源码入门八
- 音频Equalizer算法研究与实现
- hadoop笔记
- GeoTools学习篇------GeoTools+Eclipse搭建QuickStart
- Java I/O操作
- 领域特定语言 笔记,解析器组合子
- cocos2dx配置信息保存