hadoop 读写文件

来源：互联网发布：淘宝怎么让别人代付编辑：程序博客网时间：2024/05/01 19:22

import java.io.IOException;
import java.io.InputStream;
import java.util.StringTokenizer;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.Reporter;

public class FileSystemCat{
public static void main(String arg[])throws Exception
{
String inuri = arg[0];
int len = Integer.parseInt(arg[1]);
String outuri = arg[2];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(inuri), conf);
FSDataInputStream in = null;
FSDataOutputStream out = null;
try
{
in = fs.open(new Path(inuri));
out = fs.create(new Path(outuri));
byte[] buffer = new byte[len];
in.read(0,buffer,0,len); // 第一个零为position 第二个零为偏移 len 为读取长度
String tmp = new String(buffer);
out.write(tmp.getBytes());
System.out.println("File read: " + tmp);
System.out.println("Hello from hadoop");
}
finally
{
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}

}

主要实现：

从hdfs上读取文件，并指定读取长度，将读取的数据输出屏幕并写到一个写的文件内。