HDFS 的API 封装成 class Scala操作
来源:互联网 发布:淘宝网时尚女发型师装 编辑:程序博客网 时间:2024/04/28 21:22
/**
*
*/
package cn.edu.zju.cs.lc
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import scala.collection.mutable.ArrayBuffer
import java.nio.ByteBuffer
import scala.collection.mutable.HashSet
import org.apache.hadoop.fs.PathFilter
/**
* @author Administrator
*
*/
class Hdfsapi {
val conf = new Configuration();
// conf.addResource(new Path("D:\\myeclipse\\Hadoop\\hadoopEx\\src\\conf\\hadoop.xml"));
val hdfs = FileSystem.get(conf);
// var hashset = new HashSet[Path]()
/**
*
*/
def deleteFile(path: String) = {
val syspath = new Path(path)
var success = false
// f - the path to delete.
//recursive - if path is a directory and set to true, the directory is deleted else throws an exception.
//In case of a file the recursive can be set to either true or false.
if (this.isDirectory(syspath) == true) {
success = hdfs.delete(syspath, true)
} else {
success = hdfs.delete(syspath)
}
success
}
/**
* return a list of file Paths
* need digui visit
*/
def traverseFiles(folder: String, hashset: HashSet[Path]): HashSet[Path] = {
// this method just return the child files
// val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
//
//
// val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
//
// var hashset = new HashSet[Path]()
//
// for(filestatus <- listfilesStatus)
// {
//
// hashset.add(filestatus.getPath())
//
// println(filestatus.getPath())
// }
// wrong
// val iter = listfilesStatus.
// while( listfilesStatus.hasNext() == true )
// {
//
// hashset.add(listfilesStatus.next().getPath())
//
// }
// var list = new List()
val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
// var hashset = new HashSet[Path]()
for (filestatus <- listfilesStatus) {
val filepath = filestatus.getPath()
if (hdfs.isFile(filepath) == true) {
if (filepath.getName().startsWith("part") == true) {
hashset.add(filestatus.getPath())
}
} else {
traverseFilepath(filepath, hashset)
}
// println(filestatus.getPath())
}
hashset
}
/**
* need a result type :HashSet[Path]
*/
def traverseFilepath(path: Path, hashset: HashSet[Path]): HashSet[Path] = {
// var list = new List()
val listfilesStatus = hdfs.listStatus(path, new DefaultFilter())
// var hashset = new HashSet[Path]()
for (filestatus <- listfilesStatus) {
val filepath = filestatus.getPath()
if (hdfs.isFile(filepath) == true) {
if (filepath.getName().startsWith("part") == true) {
hashset.add(filestatus.getPath())
}
} else {
traverseFilepath(filepath, hashset)
}
// println(filestatus.getPath())
}
// val iter = listfilesStatus.
// while( listfilesStatus.hasNext() == true )
// {
//
// hashset.add(listfilesStatus.next().getPath())
//
// }
hashset
}
/**
*
*/
def copyFiles(frompath: String, topath: String, overrideExist: Boolean): Boolean = {
// if the frompath or the topath is not valate, it should throw a exception
val sourcepath = new Path(frompath)
val destpath = new Path(topath)
// println(destpath.getName())
val sourceInputStream = hdfs.open(sourcepath)
val destOutputStream = this.createFile(destpath)
var buffer = new Array[Byte](64000000)
var len = sourceInputStream.read(buffer)
//while( (len = sourceInputStream.read( buffer ) != -1 )) warning: unit != int will always true ===> functional programming
while (len != -1) {
println("buffer lenth:" + buffer.length + " len :" + len)
destOutputStream.write(buffer, 0, len - 1)
len = sourceInputStream.read(buffer)
}
destOutputStream.hflush()
sourceInputStream.close()
destOutputStream.close()
true
}
def copyFiles(frompath: Path, topath: Path): Boolean = {
// println(destpath.getName())
val sourceInputStream = hdfs.open(frompath)
val destOutputStream = this.createFile(topath)
var buffer = new Array[Byte](64000000)
var len = sourceInputStream.read(buffer)
//while( (len = sourceInputStream.read( buffer ) != -1 )) warning: unit != int will always true ===> functional programming
while (len != -1) {
println("buffer lenth:" + buffer.length + " len :" + len)
destOutputStream.write(buffer, 0, len - 1)
len = sourceInputStream.read(buffer)
}
destOutputStream.hflush()
sourceInputStream.close()
destOutputStream.close()
true
}
/**
* copy files from a set which includes all the source paths to another
*/
def copyFiles(source: HashSet[Path], destfolder: Path): Boolean = {
for (path <- source) {
// var dest = destfolder.suffix("/").suffix(path.getName())
var dest = destfolder.toString() + "/" + path.getName()
println(dest.toString())
copyFiles(path, new Path(dest))
}
true
}
/**
* copy files from one folder to another
*/
def copyFiles(sourceFolder: String, destfolder: String): Boolean = {
var hashset = new HashSet[Path]()
hashset = traverseFiles(sourceFolder, hashset)
copyFiles(hashset, new Path(destfolder))
true
}
/**
*
*/
def isFile(path: String) = {
val syspath = new Path(path)
hdfs.isFile(syspath)
}
def isFile(path: Path) = {
hdfs.isFile(path)
}
/**
*
*/
def isDirectory(path: String) = {
val syspath = new Path(path)
hdfs.isDirectory(syspath)
}
def isDirectory(path: Path) = {
hdfs.isDirectory(path)
}
/**
*
*/
def isValidatePath(path: String) {
val syspath = new Path(path)
// syspath.
}
/**
*
*/
def isexist(path: String) = {
val syspath = new Path(path)
hdfs.exists(syspath)
}
def isexist(path: Path) = {
hdfs.exists(path)
}
/**
*
*/
def uploadFiles(frompath: String, toPath: String) = {}
def uploadFiles(frompath: Path, toPath: Path) = {}
/**
*
*/
def dowmloadFiles() = {}
/**
*
*/
def createFile(path: String) = {
val syspath = new Path(path)
hdfs.create(syspath)
}
def createFile(path: Path) = {
hdfs.create(path)
}
/**
*
*/
def renameFile(path: String, newptah: String) = {}
/**
*
*/
def getModifyyTIme(path: String) = {}
/**
*
*/
def getHostName(path: String) = {
}
}
/**
* the default path filter
*/
class DefaultFilter extends PathFilter {
override def accept(path: Path) = {
//filter the files not endwith "parquent"
true
}
}
测试:
/**
*
*/
package cn.edu.zju.cs.lc.test
import cn.edu.zju.cs.lc.Hdfsapi
import org.apache.hadoop.fs.Path
import scala.collection.mutable.HashSet
/**
* @author Administrator
*
*/
object test {
def main(args: Array[String]): Unit = {
var hdfsfile = new Hdfsapi()
// hdfsfile.deleteFile("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014")
hdfsfile.copyFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps/part-00000", "G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014/ip.txt", true)
val path = new Path("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods")
println("path:" + path)
var hashset = new HashSet[Path]()
hashset = hdfsfile.traverseFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps" ,hashset)
// set.foreach(path =>println(path))
println("OK")
// hashset.foreach(path =>println(path))
hdfsfile.copyFiles(hashset, new Path("E:"))
}
}
- HDFS 的API 封装成 class Scala操作
- Java 封装 HDFS API 操作
- HDFS的API操作
- scala中hdfs文件的操作
- HDFS的JAVA API操作
- 简单的API操作hdfs
- 常用HDFS的API操作
- HDFS的JAVA API 操作
- Java学习,数组的相关操作,封装类,Class类,反射API
- 使用HDFS API实现hadoop HDFS文件系统的基本操作
- HDFS的JAVA接口API操作实例
- HDFS的JAVA接口API操作实例
- HDFS的JAVA接口API操作实例
- HDFS的JAVA接口API操作实例
- HDFS的JAVA接口API操作实例
- HDFS的JAVA接口API操作实例
- HDFS常用的文件API操作
- HDFS的Java API操作代码
- Android如何立即退出整个程序
- CSplitButton按钮的使用
- SilverLight TextBox水印效果实现
- HDU3533:Escape(BFS)
- 最新版SDWebImage的使用
- HDFS 的API 封装成 class Scala操作
- 为什么ListView.setOnItemClickListener、setOnCreateContextMenuListener会无效
- 最大子序列和
- 小学生计算机辅助教学系统
- java 根据经纬度 坐标 计算两点之间的距离
- Problem H: 蛇形填阵
- java初入
- C++统计英语句子中各个单词出现的次数
- LEETCODE: Merge k Sorted Lists