利用java操作Hadoop文件 /hdfs

来源:互联网 发布:数据库支持工程师招聘 编辑:程序博客网 时间:2024/05/21 05:40

在eclipse环境下进行的编程,未实验其他环境下是否可以

/**Name:KcnaufHadoopManagement *Author:Zhang Bing  *Created:2013-7-31 *Function:Manipulate the hadoop filesystem * */package com.exercise.hadoop.command;import java.io.BufferedReader;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.InputStreamReader;import java.util.LinkedList;import java.util.List;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import com.exercise.shell.show.SSHShell;public class HadoopManipulate {public static Configuration config;//config the Configuration to define which cluster to be connect.public HadoopManipulate() {if (null == config) {config = new Configuration();String fp = (this.getClass().getResource("/core-site.xml")).getPath();config.addResource(fp);System.out.println("============================");System.out.println(fp);System.out.println("============================");FileReader fReader = null;try {fReader = new FileReader(fp);} catch (FileNotFoundException e) {// TODO Auto-generated catch blocke.printStackTrace();} finally {try {fReader.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}} /** * read file in the HDFS * @param filename * @return */public int readFile(String filename) {try {FileSystem hdfSystem = FileSystem.get(config);FSDataInputStream fsDataInputStream = hdfSystem.open(new Path(filename));IOUtils.copyBytes(fsDataInputStream, System.out, 4096, false);hdfSystem.close();return 1;} catch (Exception e) {e.printStackTrace();return -1;// TODO: handle exception}}///////////////////////////////////////////////////////////** * read file and return content * @param filename * @return */public List<String> readFile_return(String filename) {try {FileSystem hdfSystem = FileSystem.get(config);FSDataInputStream fsDataInputStream = hdfSystem.open(new Path(filename));InputStreamReader isr=new InputStreamReader(fsDataInputStream,"UTF-8");BufferedReader bufferedReader=new BufferedReader(isr);List<String> lTest=new LinkedList<String>();String text;while((text=bufferedReader.readLine())!=null){lTest.add(text);}hdfSystem.close();return lTest;} catch (Exception e) {e.printStackTrace();return null;// TODO: handle exception}}public static void main(String[] args) {HadoopManipulate hm=new HadoopManipulate();hm.get("/ccccc", "/");}/** * This method has warning of 'Unable to load native-hadoop library for your platform' *///public int getFileFromHDFS(String HDFSFile, String LocalFile) {//try {//Configuration config1 = new Configuration();//config1.set("hadoop.job.ugi", "hadoop,supergroup");////FileSystem hdfSystem = FileSystem.get(config1);//Path hDFSPath = new Path(HDFSFile);//Path localPath = new Path(LocalFile);//hdfSystem.copyToLocalFile( hDFSPath, localPath);//hdfSystem.close();//return 1;//} catch (Exception e) {//e.printStackTrace();//return -1;//// TODO: handle exception//}//}/** * get the file from HDFS  * @param HDFSFile * @param LocalFile * @return */public int get(String HDFSFile, String LocalFile){SSHShell ssh=new SSHShell();String text="hadoop fs -copyToLocal "+HDFSFile+" "+LocalFile;ssh.sshShell_return(text);return 1;}/** * This method has warning of 'Unable to load native-hadoop library for your platform' *///public int putFileToHDFS(String HDFSFile, String LocalFile) {//try {//FileSystem hdfSystem = FileSystem.get(config);//Path hdfsPath = new Path(HDFSFile);//Path localPath = new Path(LocalFile);//hdfSystem.copyFromLocalFile(localPath, hdfsPath);//hdfSystem.close();//return 1;//} catch (Exception e) {//e.printStackTrace();//return -1;//// TODO: handle exception//}//}/** * put the file to HDFS * @param HDFSFile * @param LocalFile * @return */public int put(String HDFSFile, String LocalFile){SSHShell ssh=new SSHShell();String text="hadoop fs -copyFromLocal "+HDFSFile+" "+LocalFile;ssh.sshShell_return(text);return 1;}/** * list all the file under HDFSFilePath * @param HDFSFilePath * @return */public int lsInHadoop(String HDFSFilePath) {try {FileSystem hdfSystem = FileSystem.get(config);FileStatus[] fileStatus = hdfSystem.listStatus(new Path(HDFSFilePath));for (int i = 0; i < fileStatus.length; i++) {System.out.println(fileStatus[i].getPath().getName().toString());}hdfSystem.close();return 1;} catch (Exception e) {e.printStackTrace();return -1;// TODO: handle exception}}//////////////////////////////////////////////////** * list all the file under HDFSFilePath and return as List<String> * @param HDFSFilePath * @return */public List<String> lsInHadoop_return(String HDFSFilePath) {try {FileSystem hdfSystem = FileSystem.get(config);FileStatus[] fileStatus = hdfSystem.listStatus(new Path(HDFSFilePath));List<String> lines=new LinkedList<String>(); for (int i = 0; i < fileStatus.length; i++) {lines.add(fileStatus[i].getPath().getName().toString());}hdfSystem.close();return lines;} catch (Exception e) {e.printStackTrace();return null;// TODO: handle exception}}/** * create a new file  * @param HDFSFile * @return */public FSDataOutputStream createANewFile(String HDFSFile) {try {FileSystem fileSystem = FileSystem.get(config);Path hdfsPath = new Path(HDFSFile);FSDataOutputStream fsDataOutputStream = fileSystem.create(hdfsPath);if (fileSystem.exists(hdfsPath)) {// fsDataOutputStream.close();// fileSystem.close();return fsDataOutputStream;} else {return null;}} catch (Exception e) {e.printStackTrace();return null;// TODO: handle exception}}/** * write new data to the file in HDFS .(append) * @param HDFSFile * @param content * @return */public int writeNewData(String HDFSFile, List<String> content) {try {FileSystem hdfSystem = FileSystem.get(config);FSDataOutputStream fsDataOutputStream = hdfSystem.append(new Path(HDFSFile));for (int i = 0; i < content.size(); i++) {fsDataOutputStream.writeBytes((content.get(i) + "\n"));}fsDataOutputStream.close();return 1;} catch (Exception e) {e.printStackTrace();System.out.println(e);return -1;// TODO: handle exception}}/** * write new data to the file in HDFS .(append) * @param HDFSFile * @param content * @return */public int writeNewData(String HDFSFile, String content) {try {FileSystem hdfSystem = FileSystem.get(config);FSDataOutputStream fsDataOutputStream = hdfSystem.append(new Path(HDFSFile));/** *The method 'writeUTF' will create code unexpected */fsDataOutputStream.writeBytes((content + "\n"));fsDataOutputStream.close();return 1;} catch (Exception e) {e.printStackTrace();System.out.println(e);return -1;// TODO: handle exception}}/** * copy in HDFS * @param oldFile * @param newFile * @return */public int copyFileInHDFS(String oldFile, String newFile) {try {//Path oldPath = new Path(oldFile);//Path newPath = new Path(newFile);/** *To avoid the IOException as use  'fsDataInputStream'  */createANewFile(newFile);writeNewData(newFile, readFile_return(oldFile));//FSDataInputStream fsDataInputStream = hdfSystem.open(oldPath);//FSDataOutputStream fsDataOutputStream = createANewFile(newFile);//fsDataOutputStream.writeUTF(fsDataInputStream.readUTF());return 1;} catch (Exception e) {e.printStackTrace();return -1;// TODO: handle exception}}/** * move in the HDFS * @param oldFile * @param newFile * @return */public int moveFileInHDFS(String oldFile, String newFile) {try {//FileSystem hdfSystem = FileSystem.get(config);//Path oldPath = new Path(oldFile);//FSDataInputStream fsDataInputStream = hdfSystem.open(oldPath);//FSDataOutputStream fsDataOutputStream = createANewFile(newFile);//fsDataOutputStream.writeUTF(fsDataInputStream.readUTF());/** *To avoid the IOException as use  'fsDataInputStream'  */createANewFile(newFile);writeNewData(newFile, readFile_return(oldFile));/** * delete the old file  */deleteFileInHDFS(oldFile);return 1;} catch (Exception e) {e.printStackTrace();return -1;// TODO: handle exception}}/** * delete in the HDFS * @param deleteFile * @return */public int deleteFileInHDFS(String deleteFile) {try {FileSystem hdfSystem = FileSystem.get(config);Path deletePath = new Path(deleteFile);hdfSystem.delete(deletePath, true);return 1;} catch (Exception e) {e.printStackTrace();return -1;// TODO: handle exception}}}