HDFS 的API 封装成 class Scala操作

来源：互联网发布：淘宝网时尚女发型师装编辑：程序博客网时间：2024/04/28 21:22

/**
*
*/
package cn.edu.zju.cs.lc

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import scala.collection.mutable.ArrayBuffer
import java.nio.ByteBuffer
import scala.collection.mutable.HashSet
import org.apache.hadoop.fs.PathFilter

/**
* @author Administrator
*
*/
class Hdfsapi {

val conf = new Configuration();
// conf.addResource(new Path("D:\\myeclipse\\Hadoop\\hadoopEx\\src\\conf\\hadoop.xml"));
val hdfs = FileSystem.get(conf);
// var hashset = new HashSet[Path]()

/**
*
*/
def deleteFile(path: String) = {

val syspath = new Path(path)

var success = false

// f - the path to delete.
//recursive - if path is a directory and set to true, the directory is deleted else throws an exception.
//In case of a file the recursive can be set to either true or false.
if (this.isDirectory(syspath) == true) {

success = hdfs.delete(syspath, true)

} else {

success = hdfs.delete(syspath)

}

success

}

/**
* return a list of file Paths
* need digui visit
*/
def traverseFiles(folder: String, hashset: HashSet[Path]): HashSet[Path] = {

// this method just return the child files
// val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
//
//
// val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
//
// var hashset = new HashSet[Path]()
//
// for(filestatus <- listfilesStatus)
// {
//
// hashset.add(filestatus.getPath())
//
// println(filestatus.getPath())
// }

// wrong
// val iter = listfilesStatus.
// while( listfilesStatus.hasNext() == true )
// {
//
// hashset.add(listfilesStatus.next().getPath())
//
// }

// var list = new List()
val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())

// var hashset = new HashSet[Path]()

for (filestatus <- listfilesStatus) {

val filepath = filestatus.getPath()

if (hdfs.isFile(filepath) == true) {

if (filepath.getName().startsWith("part") == true) {

hashset.add(filestatus.getPath())

}

} else {

traverseFilepath(filepath, hashset)

}

// println(filestatus.getPath())
}

hashset

}

/**
* need a result type :HashSet[Path]
*/
def traverseFilepath(path: Path, hashset: HashSet[Path]): HashSet[Path] = {

// var list = new List()
val listfilesStatus = hdfs.listStatus(path, new DefaultFilter())

// var hashset = new HashSet[Path]()

for (filestatus <- listfilesStatus) {

val filepath = filestatus.getPath()

if (hdfs.isFile(filepath) == true) {

if (filepath.getName().startsWith("part") == true) {

hashset.add(filestatus.getPath())

}

} else {

traverseFilepath(filepath, hashset)

}

// println(filestatus.getPath())
}

// val iter = listfilesStatus.
// while( listfilesStatus.hasNext() == true )
// {
//
// hashset.add(listfilesStatus.next().getPath())
//
// }

hashset
}

/**
*
*/
def copyFiles(frompath: String, topath: String, overrideExist: Boolean): Boolean = {

// if the frompath or the topath is not valate, it should throw a exception

val sourcepath = new Path(frompath)
val destpath = new Path(topath)

// println(destpath.getName())

val sourceInputStream = hdfs.open(sourcepath)

val destOutputStream = this.createFile(destpath)

var buffer = new Array[Byte](64000000)
var len = sourceInputStream.read(buffer)

//while( (len = sourceInputStream.read( buffer ) != -1 )) warning: unit != int will always true ===> functional programming
while (len != -1) {

println("buffer lenth:" + buffer.length + " len :" + len)

destOutputStream.write(buffer, 0, len - 1)

len = sourceInputStream.read(buffer)

}

destOutputStream.hflush()
sourceInputStream.close()
destOutputStream.close()

true

}

def copyFiles(frompath: Path, topath: Path): Boolean = {

// println(destpath.getName())

val sourceInputStream = hdfs.open(frompath)

val destOutputStream = this.createFile(topath)

var buffer = new Array[Byte](64000000)
var len = sourceInputStream.read(buffer)

//while( (len = sourceInputStream.read( buffer ) != -1 )) warning: unit != int will always true ===> functional programming
while (len != -1) {

println("buffer lenth:" + buffer.length + " len :" + len)

destOutputStream.write(buffer, 0, len - 1)

len = sourceInputStream.read(buffer)

}

destOutputStream.hflush()
sourceInputStream.close()
destOutputStream.close()

true

}

/**
* copy files from a set which includes all the source paths to another
*/
def copyFiles(source: HashSet[Path], destfolder: Path): Boolean = {

for (path <- source) {

// var dest = destfolder.suffix("/").suffix(path.getName())

var dest = destfolder.toString() + "/" + path.getName()

println(dest.toString())

copyFiles(path, new Path(dest))

}

true
}

/**
* copy files from one folder to another
*/
def copyFiles(sourceFolder: String, destfolder: String): Boolean = {

var hashset = new HashSet[Path]()

hashset = traverseFiles(sourceFolder, hashset)

copyFiles(hashset, new Path(destfolder))

true
}

/**
*
*/
def isFile(path: String) = {

val syspath = new Path(path)

hdfs.isFile(syspath)
}

def isFile(path: Path) = {

hdfs.isFile(path)
}

/**
*
*/
def isDirectory(path: String) = {

val syspath = new Path(path)

hdfs.isDirectory(syspath)
}

def isDirectory(path: Path) = {

hdfs.isDirectory(path)
}

/**
*
*/
def isValidatePath(path: String) {

val syspath = new Path(path)
// syspath.

}

/**
*
*/
def isexist(path: String) = {

val syspath = new Path(path)

hdfs.exists(syspath)

}

def isexist(path: Path) = {

hdfs.exists(path)

}

/**
*
*/
def uploadFiles(frompath: String, toPath: String) = {}

def uploadFiles(frompath: Path, toPath: Path) = {}

/**
*
*/
def dowmloadFiles() = {}

/**
*
*/
def createFile(path: String) = {

val syspath = new Path(path)

hdfs.create(syspath)
}

def createFile(path: Path) = {

hdfs.create(path)

}

/**
*
*/
def renameFile(path: String, newptah: String) = {}

/**
*
*/
def getModifyyTIme(path: String) = {}

/**
*
*/
def getHostName(path: String) = {


}

}

/**
* the default path filter
*/
class DefaultFilter extends PathFilter {

override def accept(path: Path) = {
//filter the files not endwith "parquent"

true
}

}

测试：

/**
*
*/
package cn.edu.zju.cs.lc.test

import cn.edu.zju.cs.lc.Hdfsapi
import org.apache.hadoop.fs.Path
import scala.collection.mutable.HashSet

/**
* @author Administrator
*
*/
object test {

def main(args: Array[String]): Unit = {

var hdfsfile = new Hdfsapi()

// hdfsfile.deleteFile("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014")


hdfsfile.copyFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps/part-00000", "G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014/ip.txt", true)

val path = new Path("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods")

println("path:" + path)


var hashset = new HashSet[Path]()

hashset = hdfsfile.traverseFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps" ,hashset)


// set.foreach(path =>println(path))
println("OK")

// hashset.foreach(path =>println(path))


hdfsfile.copyFiles(hashset, new Path("E:"))

}

}

0 0