复习总结08：Scala基础

来源：互联网发布：windows xp logo 编辑：程序博客网时间：2024/06/04 18:06

一、Scala编程语法

//1、定义一个方法
def method1(x: Int, y: Int): Int = {
    return x * y
  }
//该方法无输入参数，其实是有返回值，默认的自动匹配。
def description = name + " is " + age + " years old with " + prop.toBuffer  
//无输入参数返回值是Session对象
def getSession(): Session ={
    sessions.remove(0)
  }
//2、定义一个函数
val method2 = (x: Float, y: Float) => x * y
println(method2(3.2f,2.0f))
    
//3、往方法中传入函数
val func = (x: Int, y: Int) => x * y
def method1(fun: (Int, Int) => Int): Int = {
    fun(2,6)*2
  }
//在方法中传入参数，执行函数    
println(method1(func))
  }
    
//4，Array定长数组,输出toBuffer，ArrayBuffer变长数组，remove，insert
val a =new Array[Int](10)
  println(a.toBuffer)
val b =new ArrayBuffer[Int](10)
  println(b)
//5，插入与倒序输出
val a = ArrayBuffer[Float](2.0f, 3.2f, 4.5f)
  println(a)
  a.insert(0, 2.0f)
for (i <- (0 until a.length).reverse) {
  println(a(i))
 }
}
//6，match 按照数据内容匹配
    val a = Array[String]("nima","shabi","hehe")
    val name =a(Random.nextInt(a.length))   //0-3以内的随机数0，1，2
    name match {
        case "nima" => println("1，nima")
        case "shabi" => println("2.shabi")
        case _ => println("3,hehe")     //不是以上默认
    }
//按照数据类型来匹配
elem match {
    case x: Int => println("Int " + x)
    case y: Double if(y >= 0) => println("Double "+ y)
    case z: String => println("String " + z)
    case _ => throw new Exception("not match exception")
  }
…………………………列表 List………………………………
//7，将某元素插入列表前面
    a  :: list  ;  a  +: list  ; lst3 = lst1.::(0)  ; lst5 = lst1.+:(0)
//将某元素添加到list后面
    lst6 = lst1 :+ 3
//将列表合并：
    lst8 = lst1 ++: lst0   // lst1在前
//将2个list合并成一个新的List
    lst7 = lst1 ++ lst0
//ListBuffer，可变序列
listBuffer +=   .append  //追加元素
    
…………………………集合 Set…………………………………   
//8，Set可变，不可变
  val set1 = new HashSet[Int]()
  //将元素和set1合并生成一个新的set，原有set不变
  val set2 = set1 + 4   //添加元素
  //set中元素不能重复
  val set3 = set1 ++ Set(5, 6, 7)    //连接两个set
  val set0 = Set(1,3,4) ++ set1
  println(set0.getClass)    
//HashSet不可变，mutable.HashSet可变  追加 +=   等价于add
……………………………Map 集合………………………………………
//9，Map集合
  val map1 = new mutable.HashMap[String, Int]()
  //向map中添加数据
  map1("spark") = 1
  map1 += (("hadoop", 2))
  map1.put("storm", 3)
  println(map1)
  //从map中移除元素
  map1 -= "spark"
  map1.remove("hadoop")
  println(map1)
//10 ，类，对象，继承等等
构造器：通过参数来确定执行主构造器还是副构造器
class Person(val name: String, val age: Int) {
  println("执行主构造器")
  var gender="male"
  def this(name: String, age: Int, gender: String) {
    this(name, age)//要首先执行此行
    println("执行辅构造器1")
    this.gender=gender
  }
}
object test {
  def main(args: Array[String]): Unit = {
    val p1 = new Person("shabi", 23,"female")   //new一个对象，根据参数匹配构造器
    println(p1.name+p1.age+p1.gender)
  }
}
//伴生对象：在Scala的类中，与类名相同的对象叫做伴生对象。类和伴生对象之间可以相互访问私有的方法和属性
//即 ，类里面的方法可以访问对象的私有变量<-->对象里面的方法也可以访问类的私有的方法和属性。就是类和对象之间所有的变量都是可以互访的！
class Person(name: String, age: Int) {
  println("执行主构造器")
  var id = 1
  private var gender = "male"
  def printPerson: Unit = {
    println(this.id + " " + this.gender + " " + this.name + " " + this.age + " " + Person.a)
  }
}
object Person {
  private var a = "唧唧歪歪"
  def apply(name: String, age: Int): Person = new Person(name, age)
  
  def main(args: Array[String]): Unit = {
    val p1 = Person.apply("曾祥雨",23)//新建对象的建议方法！
    p1.id = 2
    p1.gender = "female"
    p1.printPerson
  }
}
//通常我们会在类的伴生对象中定义apply方法，当遇到类名(参数1,...参数n)时apply方法会被调用
//新建对象时，用apply来实现
//11、匹配样例类
case class SubmitTask(id: String, name: String)
case class HeartBeat(time: Long)
case object CheckTimeOutTask
object CaseDemo04 extends App{
  val arr = Array(CheckTimeOutTask, HeartBeat(12333), SubmitTask("0001", "task-0001"))
  arr(Random.nextInt(arr.length)) match {
    case SubmitTask(id, name)    =>   {println(s"$id, $name")//前面需要加上s, $id直接取id的值}
    case HeartBeat(time)    =>    {println(time)}
    case CheckTimeOutTask    =>   {println("check")}
  }
}
//12，Scala高阶函数，作为值的函数，匿名函数，将方法变为函数，柯里化
//作为值的函数
 val fun1 = (x:Int) => x*x
 arr.map(fun1)
//功能一样，匿名函数如下，省略了函数定义步骤
 arr.map( (x:Int) = >x*x )
//第一种：最直观的方式 (Int) => Int
  new_list = list.map((x: Int) => x * 3)
//第二种：由于map方法知道你会传入一个类型为(Int) => Int的函数，你可以简写
  new_list = list.map((x) => x * 3)
//第三种：对于只有一个参数的函数，你可以省去参数外围的()
  new_list = list.map(x => x * 3)
//第四种：(终极方式)如果参数在=>右侧只出现一次，可以使用_
  new_list = list.map(_ * 3)
  new_list.foreach(println(_))
  var a = Array(1,2,3)
  a.map(_* 3)
  
//将方法变为函数
def fun(x: Int): Int ={
    x*x
  }
 val fun2=fun _
 arr2 = arr.map(fun2)
//柯里化！ 柯里化指的是将原来接受两个参数的方法变成新的接受一个参数的方法的过程
def m(x:Int)=(y:Int)=>x*y
  val func = m(3)
  val func2=func(5)
  println(func2)
//隐式转换
import context._
//放在门面
object context{
  implicit val b ="yin"//隐士值的对象里，数据类型不能一样
 // implicit val a="yu"
}
object ImplicitValue {
  def sayHi()(implicit name :String="aaa"): Unit ={
    println(s"hi~$name") //当执行找不到值时，去context内寻找类型一样的
  }
  def main(args: Array[String]): Unit = {
    sayHi()
  }
}

二、RDD算子

    val conf = new SparkConf().setAppName("WC").setMaster("local")
    val sc = new SparkContext(conf)
    //textFile会产生两个RDD：HadoopRDD  -> MapPartitinsRDD
    val rdd = sc.textFile("d://a.txt")           //逐行读取   a \r a b \r a b c \r a b c d \r a b c d e \r
    // 产生一个RDD ：MapPartitinsRDD
    val rdd2 = rdd.flatMap(_.split(" "))  //按照空格分开 a a b a b c a b c d a b c d e
    //产生一个RDD MapPartitionsRDD
    val rdd3 = rdd2.map((_, 1))//构造 (a,1), (a,1), (b,1), (a,1), (b,1), (c,1), (a,1), (b,1), (c,1), (d,1), (a,1), (b,1), (c,1), (d,1), (e,1)
    //产生一个RDD ShuffledRDD
    val rdd4 = rdd3.reduceByKey(_ + _) //按照key执行函数(_+_)相同key的value求和。 (a,5), (b,4), (c,3), (d,2), (e,1)
    //产生一个RDD: mapPartitions
    //val rdd5 = rdd4.saveAsTextFile("d://b.txt")
    println(rdd4.collect().sortBy(_._1).toBuffer   //ArrayBuffer( (a,5), (b,4), (c,3), (d,2), (e,1) ) 将map以buffer形式 
    sc.stop()
            
//UserLocation,取出数据返回map            
      val rdd1 = sc.textFile("c://bs_log").map(x => {
      val arr = x.split(",")
      val mb = (arr(0),arr(2))
      val flag = arr(3)
      var time = arr(1).toLong
      if (flag == "1") time = -time
      (mb, time)
      })
            
 //找区别  val rdd =rdd1.map(_,1)相比上例这个是直接执行函数，匿名函数而已！！！
//join
    val rdd1.join(rdd)    rdd1=(k , v)  rdd2=(k , w)   join之后返回一个(k,(v,w))
            
            
            

三、Akka.Actor

Worker和Master编程的区别：都是继承了Actor实现方法，也都实现了自己的ip地址和端口号的设置。区别：Worker的preStart方法连接到了Master

Akka的Actor核心：其实是通过继承Actor实现了若干个可以互相连接的节点。只是要有一个节点来管理其他节点，老大叫Master，其他叫Worker。这些节点实现的消息处理是谁发给我消息，监听到后我返回给谁

A：对于Master来说，实现一个对外可以连接的IP地址和端口号即可，再就是丰富其匹配的消息的业务逻辑。

B：对于这若干个Worker来说，

①要在preStart()方法中实现链接的Master的IP地址和端口号，并且发送"connect"消息来表明连接。

②要在接收消息的方法中匹配Master连接成功的消息，来表明自己连接到了Master。

③要在实例化自己时，把要连接的Master的IP地址和端口号传入。

④命名为worker

四、Spark编程

//设置日志输出内容，
LoggerLevels.setStreamingLogLevels()
object LoggerLevels extends Logging {
  def setStreamingLogLevels() {
    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
    if (!log4jInitialized) {
      logInfo("Setting log level to [WARN] for streaming example." +
        " To override add a custom log4j.properties to the classpath.")
      Logger.getRootLogger.setLevel(Level.WARN)
    }
  }
}
  //取出数据，返回map
 val rdd1 = rdd0.reduceByKey(_+_).map(t => {
      val mobile = t._1._1
      val lac = t._1._2
      val time = t._2
      (lac, (mobile, time))
    })
     
//构造RDD有三种方式：（1）、从集合中创建RDD；（2）、从外部存储创建RDD；（3）、从其他RDD创建。
 val rdd1 = sc.parallelize(List(("yuihatano", 90, 28, 1), ("angelababy", 90, 27, 2),("JuJingYi", 95, 22, 3)))
     
//排序或者调用其他方法示例
 val rdd2 = rdd1.sortBy(x => Girl(x._2, x._3), false)//排序方法是Girl()
 case class Girl(val faceValue: Int, val age: Int) extends Ordered[Girl] with Serializable {
   override def compare(that: Girl): Int = {
    if(this.faceValue == that.faceValue) {
      that.age - this.age
    } else {
      this.faceValue - that.faceValue
    }
  }
}  
//partitionBy     
  val ints = rdd3.map(_._1).distinct().collect()
   val hostPartitioner = new HostParitioner(ints)  //ints确定并发量// val rdd4 = rdd3.partitionBy(new HashPartitioner(ints.length))
   val rdd4 = rdd3.partitionBy(hostPartitioner).mapPartitions(it => {
     it.toList.sortBy(_._2._2).reverse.take(2).iterator
    })a

阅读全文

0 0