scala 代码实例——customer_extract
来源:互联网 发布:淘宝app产品定位 编辑:程序博客网 时间:2024/05/22 12:54
import org.apache.spark.SparkConfimport org.apache.spark.SparkContextimport org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}import org.apache.spark.rdd.RDDimport util.control.Breaks._object customer_extract { def main(args: Array[String]) { val spark = SparkSession .builder() .appName("Spark SQL ") .config("spark.some.config.option", "some-value") .getOrCreate() import spark.implicits._ val df = spark.read.json("/spark_data/spark_sql_data.json") df.createOrReplaceTempView("data") // Global temporary view is tied to a system preserved database `global_temp` //获取所有用户Mac地址 val distinct_mac_DF = spark.sql("SELECT DISTINCT mac FROM data") val mac_array = distinct_mac_DF.collect() var i = 0 var result_string = "" //对每一个用户(Mac)进行循环 while(i < mac_array.length){ var mac = mac_array(i) var sql = "SELECT `time` from data where mac = '"+mac+"' order by `time` limit 1" var min_time = (spark.sql(sql).collect())(0).toString.toInt sql = "SELECT `time` from data where mac = '"+mac+"' order by `time` desc limit 1" var max_time = (spark.sql(sql).collect())(0).toString.toInt //第一层过滤,过滤掉 只检测到一次的用户 if(min_time == max_time) { break() } var now_time = min_time var j = 0 var old_num = 0 var num = 0 var flag = 0 // 0 为 正常降序,1 为 另一个 start_time var start_time = 0 var leave_time = 0 /* 在最小时间和最大时间按照时间间隔从小到大循环*/ while(now_time <( max_time+ 300)){ now_time = min_time +(j * 120) j = j + 1 sql = "SELECT count(*) num from data where mac ='"+mac+"' and `time`>"+ now_time+"" num = (spark.sql(sql).collect())(0).toString.toInt if (j == 1) { old_num = num; start_time = min_time; } else { if ((old_num > num) && (flag == 0)) { old_num = num; } else if ((num == old_num) && (flag == 0)) { leave_time = now_time - 180; //添加到结果集 result_string = result_string + """{ "mac":"""+mac + ","+ """ "start_time": """+start_time +","+ """ "leave_time": """+leave_time+"}\n" //重置参数,准备记录下一次访问参数 start_time = 0; leave_time = 0; flag = 1; } else if ((old_num > num) && (flag == 1)) { start_time = now_time; flag = 0; } } } i=i+1 } //将结果集 存入 文件 import java.io._ val writer = new PrintWriter(new File("visit_records.txt" )) writer.write(result_string) writer.close() }}
0 0
- scala 代码实例——customer_extract
- scala 实例——一
- scala实例——二
- scala实例——三
- scala实例——四
- scala实例——五
- scala实例——六
- scala——实例 new_customer_extract
- scala.xml.Utility 代码实例
- Scala学习笔记16【Scala闭包代码实例】
- Spark GraphX 入门实例完整scala代码
- Spark GraphX 入门实例完整Scala代码
- 从零开始学Scala(一)——Scala环境搭建与第一行代码
- 从零开始学Scala(一)——Scala环境搭建与第一行代码
- 从零开始学Scala(一)——Scala环境搭建与第一行代码
- 从零开始学Scala(一)——Scala环境搭建与第一行代码
- scala —— maven scala项目开发
- scala-school_concurrency in scala—Thread based
- 数据结构与算法(14)——队列习题一
- js特效
- MFC学习(02) 矩形移动 (VS2015版本)
- Android框架之路——整体介绍(持续更新...)
- android graphis subsystem
- scala 代码实例——customer_extract
- 51nod 1562 玻璃切割 【线段树】
- shell 变量 数学 运算
- 1647
- Linux虚拟内存实现原理
- 【MySQL-数据备份】
- scala 代码示例
- Java集合之Properties
- Java实现ArrayList去重复值