spark--transform算子--mapPartitionsWithIndex

来源:互联网 发布:matlab定义数组 编辑:程序博客网 时间:2024/06/05 02:04
import org.apache.spark.{SparkConf, SparkContext}import scala.collection.mutable.ArrayBuffer/**  * Created by liupeng on 2017/6/15.  */object T_mapPartitionsWithIndex {  System.setProperty("hadoop.home.dir","F:\\hadoop-2.6.5")  def fun_index(index : Int, iter : Iterator[String]) : Iterator[String] = {    var list  = ArrayBuffer[String]()    while (iter.hasNext)    {      val name : String = iter.next()      var fs = index + ":" + name      list += fs      println(fs)    }    return list.iterator  }  def main(args: Array[String]): Unit = {    val conf = new SparkConf().setAppName("mapPartitionsWithIndex_test").setMaster("local")    val sc = new SparkContext(conf)    //准备一下数据    val names: List[String] = List("liupeng", "xuliuxi", "xiaoma")    val nameRDD = sc.parallelize(names, 2)    //  按照分区以及索引遍历    //如果想知道谁分到了一起,mapPartitionsWithIndex这个算子可以拿到每个partition的index    val nameWithPartionIndex = nameRDD.mapPartitionsWithIndex(fun_index)    println(nameWithPartionIndex.count())  }}
运行结果:
0:liupeng

1:xuliuxi
1:xiaoma

3
原创粉丝点击