HDFS 的API 封装成 class Scala操作

来源:互联网 发布:淘宝网时尚女发型师装 编辑:程序博客网 时间:2024/04/28 21:22


/**
 *
 */
package cn.edu.zju.cs.lc


import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import scala.collection.mutable.ArrayBuffer
import java.nio.ByteBuffer
import scala.collection.mutable.HashSet
import org.apache.hadoop.fs.PathFilter


/**
 * @author Administrator
 *
 */
class Hdfsapi {


  val conf = new Configuration();
  //    conf.addResource(new Path("D:\\myeclipse\\Hadoop\\hadoopEx\\src\\conf\\hadoop.xml"));  
  val hdfs = FileSystem.get(conf);
  //    var hashset = new HashSet[Path]()


  /**
   *
   */
  def deleteFile(path: String) = {


    val syspath = new Path(path)


    var success = false


    //     f - the path to delete.
    //recursive - if path is a directory and set to true, the directory is deleted else throws an exception. 
    //In case of a file the recursive can be set to either true or false.
    if (this.isDirectory(syspath) == true) {


      success = hdfs.delete(syspath, true)


    } else {


      success = hdfs.delete(syspath)


    }


    success


  }


  /**
   * return a list of file Paths
   * need digui visit
   */
  def traverseFiles(folder: String, hashset: HashSet[Path]): HashSet[Path] = {


    // this method just return the child files   
    // val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
    //
    //    
    //    val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())
    //    
    //    var hashset = new HashSet[Path]()
    //    
    //    for(filestatus <- listfilesStatus)
    //    {
    //      
    //      hashset.add(filestatus.getPath())
    //      
    //      println(filestatus.getPath())
    //    }


    //  wrong   
    //    val iter = listfilesStatus.
    //    while(  listfilesStatus.hasNext() == true )
    //    {
    //      
    //      hashset.add(listfilesStatus.next().getPath())
    //      
    //    }


    //    var list  = new List()
    val listfilesStatus = hdfs.listStatus(new Path(folder), new DefaultFilter())


    //    var hashset = new HashSet[Path]()


    for (filestatus <- listfilesStatus) {


      val filepath = filestatus.getPath()


      if (hdfs.isFile(filepath) == true) {
        
        if (filepath.getName().startsWith("part") == true) {
          
          hashset.add(filestatus.getPath())
          
        }


      } else {


        traverseFilepath(filepath, hashset)


      }


      //      println(filestatus.getPath())
    }


    hashset


  }


  /**
   * need a result type  :HashSet[Path]
   */
  def traverseFilepath(path: Path, hashset: HashSet[Path]): HashSet[Path] = {


    //    var list  = new List()
    val listfilesStatus = hdfs.listStatus(path, new DefaultFilter())


    //    var hashset = new HashSet[Path]()


    for (filestatus <- listfilesStatus) {


      val filepath = filestatus.getPath()


      if (hdfs.isFile(filepath) == true) {


        if (filepath.getName().startsWith("part") == true) {
          
          hashset.add(filestatus.getPath())
          
        }


      } else {


        traverseFilepath(filepath, hashset)


      }


      //      println(filestatus.getPath())
    }


    //    val iter = listfilesStatus.
    //    while(  listfilesStatus.hasNext() == true )
    //    {
    //      
    //      hashset.add(listfilesStatus.next().getPath())
    //      
    //    }


    hashset
  }


  /**
   *
   */
  def copyFiles(frompath: String, topath: String, overrideExist: Boolean): Boolean = {


    // if the frompath or the topath is not valate, it should throw a exception 


    val sourcepath = new Path(frompath)
    val destpath = new Path(topath)


    //    println(destpath.getName())


    val sourceInputStream = hdfs.open(sourcepath)


    val destOutputStream = this.createFile(destpath)


    var buffer = new Array[Byte](64000000)
    var len = sourceInputStream.read(buffer)


    //while(  (len = sourceInputStream.read( buffer ) != -1 )) warning: unit !=  int  will always true ===> functional programming
    while (len != -1) {


      println("buffer lenth:" + buffer.length + "   len :" + len)


      destOutputStream.write(buffer, 0, len - 1)


      len = sourceInputStream.read(buffer)


    }


    destOutputStream.hflush()
    sourceInputStream.close()
    destOutputStream.close()


    true


  }


  def copyFiles(frompath: Path, topath: Path): Boolean = {


    //    println(destpath.getName())


    val sourceInputStream = hdfs.open(frompath)


    val destOutputStream = this.createFile(topath)


    var buffer = new Array[Byte](64000000)
    var len = sourceInputStream.read(buffer)


    //while(  (len = sourceInputStream.read( buffer ) != -1 )) warning: unit !=  int  will always true ===> functional programming
    while (len != -1) {


      println("buffer lenth:" + buffer.length + "   len :" + len)


      destOutputStream.write(buffer, 0, len - 1)


      len = sourceInputStream.read(buffer)


    }


    destOutputStream.hflush()
    sourceInputStream.close()
    destOutputStream.close()


    true


  }


  /**
   * copy files from a set which includes all the source paths  to another
   */
  def copyFiles(source: HashSet[Path], destfolder: Path): Boolean = {


    for (path <- source) {


      //      var dest = destfolder.suffix("/").suffix(path.getName())


      var dest = destfolder.toString() + "/" + path.getName()


      println(dest.toString())


      copyFiles(path, new Path(dest))


    }


    true
  }


  /**
   * copy files from one folder to another
   */
  def copyFiles(sourceFolder: String, destfolder: String): Boolean = {


    var hashset = new HashSet[Path]()


    hashset = traverseFiles(sourceFolder, hashset)


    copyFiles(hashset, new Path(destfolder))


    true
  }


  /**
   *
   */
  def isFile(path: String) = {


    val syspath = new Path(path)


    hdfs.isFile(syspath)
  }


  def isFile(path: Path) = {


    hdfs.isFile(path)
  }


  /**
   *
   */
  def isDirectory(path: String) = {


    val syspath = new Path(path)


    hdfs.isDirectory(syspath)
  }


  def isDirectory(path: Path) = {


    hdfs.isDirectory(path)
  }


  /**
   *
   */
  def isValidatePath(path: String) {


    val syspath = new Path(path)
    //     syspath.


  }


  /**
   *
   */
  def isexist(path: String) = {


    val syspath = new Path(path)


    hdfs.exists(syspath)


  }


  def isexist(path: Path) = {


    hdfs.exists(path)


  }


  /**
   *
   */
  def uploadFiles(frompath: String, toPath: String) = {}


  def uploadFiles(frompath: Path, toPath: Path) = {}


  /**
   *
   */
  def dowmloadFiles() = {}


  /**
   *
   */
  def createFile(path: String) = {


    val syspath = new Path(path)


    hdfs.create(syspath)
  }


  def createFile(path: Path) = {


    hdfs.create(path)
    
  }


  /**
   *
   */
  def renameFile(path: String, newptah: String) = {}


  /**
   *
   */
  def getModifyyTIme(path: String) = {}


  /**
   *
   */
  def getHostName(path: String) = {
    
     
  }


}


/**
 * the default path filter
 */
class DefaultFilter extends PathFilter {


  override def accept(path: Path) = {
    //filter the files not endwith "parquent"


    true
  }


}



测试:


/**
 *
 */
package cn.edu.zju.cs.lc.test


import cn.edu.zju.cs.lc.Hdfsapi
import org.apache.hadoop.fs.Path
import scala.collection.mutable.HashSet


/**
 * @author Administrator
 *
 */
object test {


  def main(args: Array[String]): Unit = {
    
    var hdfsfile = new Hdfsapi()
    
//    hdfsfile.deleteFile("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014")
    
    
    hdfsfile.copyFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps/part-00000", "G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/hive/warehouse/register/2014/ip.txt", true)
    
    val path = new Path("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods")
    
    println("path:" + path)
    
    
    var hashset = new HashSet[Path]()
    
  hashset =   hdfsfile.traverseFiles("G:/Java/Javaworkplace/javaWeb/scala-workplace/logs/lods/hive/warehouse/device-first/2014/201410/20141001/devFirstApps" ,hashset)
    
    
//    set.foreach(path =>println(path))
    println("OK")
    
//        hashset.foreach(path =>println(path))
      
       
    hdfsfile.copyFiles(hashset, new Path("E:"))
    
  }


}





0 0
原创粉丝点击