根据日志统计出每个用户在站点所呆时间最长的前2个的信息

来源:互联网 发布:数据库本地连接不上 编辑:程序博客网 时间:2024/05/21 07:07

package com.ljt.spark01

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import shapeless._0

/**
* 根据日志统计出每个用户在站点所呆时间最长的前2个的信息
* 1, 先根据”手机号站点”为唯一标识, 算一次进站出站的时间, 返回(手机号站点, 时间间隔)
* 2, 以”手机号站点”为key, 统计每个站点的时间总和, (“手机号站点”, 时间总和)
* 3, (“手机号_站点”, 时间总和) –> (手机号, 站点, 时间总和)
* 4, (手机号, 站点, 时间总和) –> groupBy().mapValues(以时间排序,取出前2个) –> (手机->((m,s,t)(m,s,t)))
* Created by root on 2016/5/16.
*/

object UserLocation {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName(“MoblieLocation”)
.setMaster(“local[2]”)
val sc = new SparkContext(conf)
// val rdd_info=sc.textFile(“data/userlocal/”)
// .flatMap(_.split(“,”)).map ( x => (x(0),x(1),x(2),x(3)) )
//
val rdd_info = sc.textFile(“data/userlocal/”).map(x => {
val arr = x.split(“,”)
val mb = (arr(0), arr(2))
val flag = arr(3)
var time1 = arr(1).toLong
//1为建立连接,时间建立连接时置为正,否则置为
val timelong = if (flag == “1”) -time1 else time1.toLong
(mb, timelong)
})

/** * //1.打印时间处理 * ArrayBuffer(((18688888888,16030401EAFB68F1E3CDF819735E1C66),-20160327082400), ((18611132889,16030401EAFB68F1E3CDF819735E1C66),-20160327082500), ((18688888888,16030401EAFB68F1E3CDF819735E1C66),20160327170000), ((18611132889,16030401EAFB68F1E3CDF819735E1C66),20160327180000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),-20160327075000), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),-20160327075100), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),20160327081000), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),20160327081300), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),-20160327175000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),-20160327182000), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),20160327220000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),20160327230000), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),-20160327081100), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),-20160327081200), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),20160327081900), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),20160327082000), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),-20160327171000), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),20160327171600), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),-20160327180500), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),20160327181500)) * *///    println(rdd_info.collect().toBuffer)/** * 2.按照电话号码分组 * ArrayBuffer(((18688888888,CC0710CC94ECC657A8561DE549D940E0),CompactBuffer(((18688888888,CC0710CC94ECC657A8561DE549D940E0),-20160327081200), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),20160327081900), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),-20160327171000), ((18688888888,CC0710CC94ECC657A8561DE549D940E0),20160327171600))), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),CompactBuffer(((18611132889,9F36407EAD0629FC166F14DDE7970F68),-20160327075000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),20160327081000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),-20160327182000), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),20160327230000))), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),CompactBuffer(((18688888888,9F36407EAD0629FC166F14DDE7970F68),-20160327075100), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),20160327081300), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),-20160327175000), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),20160327220000))), ((18688888888,16030401EAFB68F1E3CDF819735E1C66),CompactBuffer(((18688888888,16030401EAFB68F1E3CDF819735E1C66),-20160327082400), ((18688888888,16030401EAFB68F1E3CDF819735E1C66),20160327170000))), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),CompactBuffer(((18611132889,CC0710CC94ECC657A8561DE549D940E0),-20160327081100), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),20160327082000), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),-20160327180500), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),20160327181500))), ((18611132889,16030401EAFB68F1E3CDF819735E1C66),CompactBuffer(((18611132889,16030401EAFB68F1E3CDF819735E1C66),-20160327082500), ((18611132889,16030401EAFB68F1E3CDF819735E1C66),20160327180000)))) * *///    val rdd_group = rdd_info.groupBy(_._1)/** * 3.计算每个手机号在每个基站停留的时间 * *///    val rdd_time=rdd_group.mapValues(_.foldLeft(0L)(_+_._2))/** * * * ArrayBuffer(((18688888888,CC0710CC94ECC657A8561DE549D940E0),1300), ((18611132889,9F36407EAD0629FC166F14DDE7970F68),54000), ((18688888888,9F36407EAD0629FC166F14DDE7970F68),51200), ((18688888888,16030401EAFB68F1E3CDF819735E1C66),87600), ((18611132889,CC0710CC94ECC657A8561DE549D940E0),1900), ((18611132889,16030401EAFB68F1E3CDF819735E1C66),97500)) * */val rdd_delay_time = rdd_info.reduceByKey(_ + _).map(t => {  //手机号  val moblie = t._1._1  //基站ID  val bs = t._1._2  //在基站停留的时间  val delay_time = t._2  (bs, (moblie, delay_time))})//读取基站对应的经纬度进入RDDval rdd_bs = sc.textFile("data/usercount/loc_info.txt").map(  x => {    val arrl = x.split(",")    //基站ID    val bs = arrl(0)    //(基站ID,(经度,纬度))    (bs, (arrl(1), arrl(2)))  })/** * 3.将基站与停留时间join映射 * ArrayBuffer((CC0710CC94ECC657A8561DE549D940E0,((18688888888,1300),(116.303955,40.041935))), (CC0710CC94ECC657A8561DE549D940E0,((18611132889,1900),(116.303955,40.041935))), (16030401EAFB68F1E3CDF819735E1C66,((18688888888,87600),(116.296302,40.032296))), (16030401EAFB68F1E3CDF819735E1C66,((18611132889,97500),(116.296302,40.032296))), (9F36407EAD0629FC166F14DDE7970F68,((18611132889,54000),(116.304864,40.050645))), (9F36407EAD0629FC166F14DDE7970F68,((18688888888,51200),(116.304864,40.050645)))) * (CC0710CC94ECC657A8561DE549D940E0,_1 * (_2 * (18688888888 _1 * ,1300 _2 * ),(116.303955 _1 * ,40.041935 _2 * ))) */val rddbs_delay = rdd_delay_time.join(rdd_bs).map(f => {  //基站ID  val bs = f._1  //手机号  val moblie = f._2._1._1  //停留时间  val delay_time = f._2._1._2  //纬度  val latitude = f._2._2._1  //经度  val lang = f._2._2._2  //重构数据结构  (moblie, bs, delay_time, lang, latitude)  //ArrayBuffer((CC0710CC94ECC657A8561DE549D940E0,18688888888,1300,40.041935,116.303955), (CC0710CC94ECC657A8561DE549D940E0,18611132889,1900,40.041935,116.303955), (16030401EAFB68F1E3CDF819735E1C66,18688888888,87600,40.032296,116.296302), (16030401EAFB68F1E3CDF819735E1C66,18611132889,97500,40.032296,116.296302), (9F36407EAD0629FC166F14DDE7970F68,18611132889,54000,40.050645,116.304864), (9F36407EAD0629FC166F14DDE7970F68,18688888888,51200,40.050645,116.304864))})/** * 4.取出每个手机号停留时间最长的两个基站 *///ArrayBuffer((16030401EAFB68F1E3CDF819735E1C66,18611132889,97500,40.032296,116.296302), (16030401EAFB68F1E3CDF819735E1C66,18688888888,87600,40.032296,116.296302), (9F36407EAD0629FC166F14DDE7970F68,18611132889,54000,40.050645,116.304864), (9F36407EAD0629FC166F14DDE7970F68,18688888888,51200,40.050645,116.304864), (CC0710CC94ECC657A8561DE549D940E0,18611132889,1900,40.041935,116.303955), (CC0710CC94ECC657A8561DE549D940E0,18688888888,1300,40.041935,116.303955))//    val rdd_maxDelay=rddbs_delay.sortBy(_._3, false)/** *     //不能直接排序,必须按照手机号分组,然后在排序 * ArrayBuffer((18688888888,CompactBuffer((18688888888,CC0710CC94ECC657A8561DE549D940E0,1300,40.041935,116.303955), (18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,40.032296,116.296302), (18688888888,9F36407EAD0629FC166F14DDE7970F68,51200,40.050645,116.304864))), (18611132889,CompactBuffer((18611132889,CC0710CC94ECC657A8561DE549D940E0,1900,40.041935,116.303955), (18611132889,16030401EAFB68F1E3CDF819735E1C66,97500,40.032296,116.296302), (18611132889,9F36407EAD0629FC166F14DDE7970F68,54000,40.050645,116.304864)))) * */val rdd_maxDelay_group = rddbs_delay.groupBy(_._1)//对每个分组进行排序val rdd_maxDelay = rdd_maxDelay_group.flatMapValues(f => {  //(18688888888,CompactBuffer((18688888888,CC0710CC94ECC657A8561DE549D940E0,1300,40.041935,116.303955), (18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,40.032296,116.296302)  //对每个手机号按照停留时间进行排序  f.toList.sortBy(_._3).reverse.take(2)})rdd_maxDelay.saveAsTextFile("data/out/lacation_out")println(rdd_maxDelay.collect().toBuffer)/** * 最终结果:ArrayBuffer((18688888888,(18688888888,16030401EAFB68F1E3CDF819735E1C66,87600,40.032296,116.296302)), (18688888888,(18688888888,9F36407EAD0629FC166F14DDE7970F68,51200,40.050645,116.304864)), (18611132889,(18611132889,16030401EAFB68F1E3CDF819735E1C66,97500,40.032296,116.296302)), (18611132889,(18611132889,9F36407EAD0629FC166F14DDE7970F68,54000,40.050645,116.304864))) * */sc.stop()

}
}

阅读全文
0 0
原创粉丝点击