scala中hdfs文件的操作
来源:互联网 发布:网络电视怎么调频道 编辑:程序博客网 时间:2024/04/29 07:08
对于org.apache.hadoop.fs.Path来说,
path.getName只是文件名,不包括路径
path.getParent也只是父文件的文件名,同样不包括路径
path.toString才是文件的全路径名
创建文件
hdfs.createNewFile(new Path(fileName))
#hdfs文件系统在创建filename对应的文件时,如果相关的文件夹不存在,会自动创建相关的文件夹
new File(fileName).createNewFile
#本地文件系统在创建filename对应的文件时,如果相关的文件夹不存在,程序会终止,报错:“java.io.IOException: 没有那个文件或目录”
以下是hdfs文件操作的工具类
package utilimport java.io.{FileSystem => _, _}import org.apache.hadoop.fs._import scala.collection.mutable.ListBuffer/** * Created by zls on 16-11-24. */object HDFSHelper { def isDir(hdfs : FileSystem, name : String) : Boolean = { hdfs.isDirectory(new Path(name)) } def isDir(hdfs : FileSystem, name : Path) : Boolean = { hdfs.isDirectory(name) } def isFile(hdfs : FileSystem, name : String) : Boolean = { hdfs.isFile(new Path(name)) } def isFile(hdfs : FileSystem, name : Path) : Boolean = { hdfs.isFile(name) } def createFile(hdfs : FileSystem, name : String) : Boolean = { hdfs.createNewFile(new Path(name)) } def createFile(hdfs : FileSystem, name : Path) : Boolean = { hdfs.createNewFile(name) } def createFolder(hdfs : FileSystem, name : String) : Boolean = { hdfs.mkdirs(new Path(name)) } def createFolder(hdfs : FileSystem, name : Path) : Boolean = { hdfs.mkdirs(name) } def exists(hdfs : FileSystem, name : String) : Boolean = { hdfs.exists(new Path(name)) } def exists(hdfs : FileSystem, name : Path) : Boolean = { hdfs.exists(name) } def transport(inputStream : InputStream, outputStream : OutputStream): Unit ={ val buffer = new Array[Byte](64 * 1000) var len = inputStream.read(buffer) while (len != -1) { outputStream.write(buffer, 0, len - 1) len = inputStream.read(buffer) } outputStream.flush() inputStream.close() outputStream.close() } class MyPathFilter extends PathFilter { override def accept(path: Path): Boolean = true } /** * create a target file and provide parent folder if necessary */ def createLocalFile(fullName : String) : File = { val target : File = new File(fullName) if(!target.exists){ val index = fullName.lastIndexOf(File.separator) val parentFullName = fullName.substring(0, index) val parent : File = new File(parentFullName) if(!parent.exists) parent.mkdirs else if(!parent.isDirectory) parent.mkdir target.createNewFile } target } /** * delete file in hdfs * @return true: success, false: failed */ def deleteFile(hdfs : FileSystem, path: String) : Boolean = { if (isDir(hdfs, path)) hdfs.delete(new Path(path), true)//true: delete files recursively else hdfs.delete(new Path(path), false) } /** * get all file children's full name of a hdfs dir, not include dir children * @param fullName the hdfs dir's full name */ def listChildren(hdfs : FileSystem, fullName : String, holder : ListBuffer[String]) : ListBuffer[String] = { val filesStatus = hdfs.listStatus(new Path(fullName), new MyPathFilter) for(status <- filesStatus){ val filePath : Path = status.getPath if(isFile(hdfs,filePath)) holder += filePath.toString else listChildren(hdfs, filePath.toString, holder) } holder } def copyFile(hdfs : FileSystem, source: String, target: String): Unit = { val sourcePath = new Path(source) val targetPath = new Path(target) if(!exists(hdfs, targetPath)) createFile(hdfs, targetPath) val inputStream : FSDataInputStream = hdfs.open(sourcePath) val outputStream : FSDataOutputStream = hdfs.create(targetPath) transport(inputStream, outputStream) } def copyFolder(hdfs : FileSystem, sourceFolder: String, targetFolder: String): Unit = { val holder : ListBuffer[String] = new ListBuffer[String] val children : List[String] = listChildren(hdfs, sourceFolder, holder).toList for(child <- children) copyFile(hdfs, child, child.replaceFirst(sourceFolder, targetFolder)) } def copyFileFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = { val targetPath = new Path(hdfsTarget) if(!exists(hdfs, targetPath)) createFile(hdfs, targetPath) val inputStream : FileInputStream = new FileInputStream(localSource) val outputStream : FSDataOutputStream = hdfs.create(targetPath) transport(inputStream, outputStream) } def copyFileToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = { val localFile : File = createLocalFile(localTarget) val inputStream : FSDataInputStream = hdfs.open(new Path(hdfsSource)) val outputStream : FileOutputStream = new FileOutputStream(localFile) transport(inputStream, outputStream) } def copyFolderFromLocal(hdfs : FileSystem, localSource: String, hdfsTarget: String): Unit = { val localFolder : File = new File(localSource) val allChildren : Array[File] = localFolder.listFiles for(child <- allChildren){ val fullName = child.getAbsolutePath val nameExcludeSource : String = fullName.substring(localSource.length) val targetFileFullName : String = hdfsTarget + Path.SEPARATOR + nameExcludeSource if(child.isFile) copyFileFromLocal(hdfs, fullName, targetFileFullName) else copyFolderFromLocal(hdfs, fullName, targetFileFullName) } } def copyFolderToLocal(hdfs : FileSystem, hdfsSource: String, localTarget: String): Unit = { val holder : ListBuffer[String] = new ListBuffer[String] val children : List[String] = listChildren(hdfs, hdfsSource, holder).toList val hdfsSourceFullName = hdfs.getFileStatus(new Path(hdfsSource)).getPath.toString val index = hdfsSourceFullName.length for(child <- children){ val nameExcludeSource : String = child.substring(index + 1) val targetFileFullName : String = localTarget + File.separator + nameExcludeSource copyFileToLocal(hdfs, child, targetFileFullName) } }}
以下是工具类的测试类
package utilimport org.apache.hadoop.conf.Configurationimport org.apache.hadoop.fs.{FileSystem, Path}import scala.collection.mutable.ListBuffer/** * Created by zls on 16-11-24. */object HDFSOperator { def start(args: Array[String]): Unit = { val hdfs : FileSystem = FileSystem.get(new Configuration) args(0) match { case "list" => traverse(hdfs, args(1)) case "createFile" => HDFSHelper.createFile(hdfs, args(1)) case "createFolder" => HDFSHelper.createFolder(hdfs, args(1)) case "copyfile" => HDFSHelper.copyFile(hdfs, args(1), args(2)) case "copyfolder" => HDFSHelper.copyFolder(hdfs, args(1), args(2)) case "delete" => HDFSHelper.deleteFile(hdfs, args(1)) case "copyfilefrom" => HDFSHelper.copyFileFromLocal(hdfs, args(1), args(2)) case "copyfileto" => HDFSHelper.copyFileToLocal(hdfs, args(1), args(2)) case "copyfolderfrom" => HDFSHelper.copyFolderFromLocal(hdfs, args(1), args(2)) case "copyfolderto" => HDFSHelper.copyFolderToLocal(hdfs, args(1), args(2)) } } def traverse(hdfs : FileSystem, hdfsPath : String) = { val holder : ListBuffer[String] = new ListBuffer[String] val paths : List[String] = HDFSHelper.listChildren(hdfs, hdfsPath, holder).toList for(path <- paths){ System.out.println("--------- path = " + path) System.out.println("--------- Path.getname = " + new Path(path).getName) } }}
0 1
- scala中hdfs文件的操作
- scala 在 spark 中操作 hdfs
- scala 在 spark 中操作 hdfs
- HDFS的文件操作
- Scala读取HDFS文件
- hadoop中hdfs对文件的操作Api
- 操作hdfs里的文件
- hdfs文件的操作常用命令
- HDFS下的文件操作
- 远程HDFS文件的操作
- HDFS 的API 封装成 class Scala操作
- Hadoop中hdfs的操作:
- scala 删除hdfs文件demo
- hadoop的hdfs文件操作实现上传文件到hdfs
- scala读取HDFS文件,每次读取一定的字节数
- scala读取HDFS上的文件,每次读取一行
- scala读取HDFS文件,每次读取一定的字节数
- HDFS常用的文件API操作
- 再不用ES6就Out了--es6变量声明
- 新闻发布项目——Servlet类(doLoginServlet)
- 关于敏捷开发的 使用见解
- java守护线程
- php制作简易的分页效果
- scala中hdfs文件的操作
- C语言位运算详解
- Sizeof与Strlen的区别与联系
- 一个将要成为程序猿的Boy
- angularJs demo
- 新闻发布项目——Servlet类(doNewsAddServlet)
- 网页布局学习
- Jstl之核心标签库与格式标签库使用
- Unity3D自学笔记——星际2技能系统分析(三)Ability(Effect)