scala实例——六
来源:互联网 发布:mac电磁阀型号 编辑:程序博客网 时间:2024/06/05 04:01
import scala.util.control.Breaks._import org.apache.spark.sql._import org.apache.spark.SparkConfimport org.apache.spark.SparkContextimport org.apache.spark.sql.{DataFrame, SQLContext, SparkSession}import org.apache.spark.rdd.RDDimport org.apache.spark.sql.execution.vectorized.ColumnarBatch.Rowimport scala.util.Randomimport scala.util.control.Breaksobject base_day_analyse { def main(args: Array[String]) { val spark = SparkSession .builder() .appName("base_day_analyse") .config("spark.some.config.option", "some-value") .getOrCreate() import spark.implicits._ val df1 = spark.read.json("/spark_data/visit_records.json") df1.createOrReplaceTempView("visit") // Global temporary view is tied to a system preserved database `global_temp` var result_string = "" var sql = "SELECT in_time from visit order by `in_time` limit 1" var min_time = (spark.sql(sql).collect()) (0).asInstanceOf[Row].getInt(0) sql = "SELECT in_time from visit order by `in_time` desc limit 1 " var max_time = (spark.sql(sql).collect()) (0).asInstanceOf[Row].getInt(0) var now_time = 0; var outer = new Breaks now_time = min_time; var last_customer_num = 0; var now_customer_num = 0; var new_customer_num = 0; var old_customer_num = 0; var interval_customer_num = 0; while (now_time <= max_time) { outer.breakable { var jump_num = 0; var visit_num = 0; var deep_in_num = 0; var avg_stay_time = 0; var time1 = now_time; var time2 = now_time + 86400; var sql2 = "SELECT COUNT(DISTINCT mac) num from visit where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and stay_time > 0"; //SQL语句 interval_customer_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0) sql2 = "SELECT COUNT(DISTINCT mac) num from visit where `in_time` >= " + min_time + " and `in_time` <= " + time2 + " and stay_time > 0"; //SQL语句 now_customer_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0) new_customer_num = now_customer_num - last_customer_num; old_customer_num = interval_customer_num - new_customer_num; sql2 = "SELECT count(*) jump_num from visit where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and stay_time <= 180"; //SQL语句 jump_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0) sql2 = "SELECT count(*) deep_in_num from visit where `in_time` >= " + time1 + " and `in_time` <= " + time2 + " and stay_time >= 1200"; //SQL语句 deep_in_num = (spark.sql(sql2).collect()) (0).asInstanceOf[Row].getInt(0) sql2 = "SELECT count(*) visit_num , AVG(stay_time) avg_stay_time from visit where `in_time` >= " + time1 + " and `in_time` <= " + time2 + ""; //SQL语句 var row = (spark.sql(sql2).collect()) (0).asInstanceOf[Row] visit_num = row.getInt(0) avg_stay_time = row.getInt(1) var jump_rate = (jump_num.asInstanceOf[Float]/visit_num.asInstanceOf[Float]) var deep_in_rate = (deep_in_num.asInstanceOf[Float]/visit_num.asInstanceOf[Float]) var format_deep_in_rate = f"$deep_in_rate%1.2f" var format_jump_rate = f"$jump_rate%1.2f" //每一条 jump 结果 添加到 结果集 var day_string = """{"time":"""+time1 +","+""""jump_out_rate":"""+format_jump_rate+","+""""deep_in_rate":"""+format_deep_in_rate+","+""""avg_stay_time":"""+avg_stay_time+","+""""new_num":"""+new_customer_num+","+""""old_num":"""+old_customer_num+","+""""customer_num":"""+visit_num+"}\n" result_string = result_string + day_string now_time = now_time + 86400; last_customer_num = now_customer_num } } //将结果集 存入 文件 import java.io._ val writer = new PrintWriter(new File("\\sparkdata\\base_day_analyse.json" )) writer.write(result_string) writer.close() }
0 0
- scala实例——六
- scala 实例——一
- scala实例——二
- scala实例——三
- scala实例——四
- scala实例——五
- scala——实例 new_customer_extract
- scala 代码实例——customer_extract
- Scala进阶源码实战之六——类型变量
- scala 学习(六)——使用filter创建数组
- Scala函数式编程(六)——闭包
- Scala学习(六)---Scala对象
- Scala练习(六)
- Scala学习笔记(六)
- 实例六
- Scala学习六:Scala中的特殊字符
- Scala 学习笔记(六)------Scala 函数嵌套
- [Scala]Scala学习笔记六 文件
- Spring MVC restful 路径小数点问题
- System Verilog视频学习笔记(8)- Randomization
- MonkeyRunner
- Linux网络编程[DNS解析原理,了解相关DNS解析的函数]
- c#生成安装包程序和卸载程序
- scala实例——六
- 组件化架构漫谈
- 【JSP学习笔记(2)】——JavaScript应用
- 元素互不相邻的最大和子数组
- C6-1 最大子数组和
- hdu 1114 Piggy-Bank(完全背包)
- Mysql SQL查询今天、昨天、n天内、第n天
- 文件超过某个大小就删除(C语言)
- 关于UIScrollView的一点小发现