scala解析json日志

来源:互联网 发布:软件试运行报告 编辑:程序博客网 时间:2024/06/05 18:31
package com.ys.scala


import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext


object ProductInfoDemo {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("ProductInfoDemo").setMaster("local[2]")
    val sc = new SparkContext(conf)
    
    val sqlContext = new SQLContext(sc)
    val lines = sc.textFile("productInfo.json", 2)
    val words = lines.flatMap(_.split("\\s"))
    val filterdWords = words.filter { _.length > 20 }
    //filterdWords.foreach(println)
    
    val productInfoDF = sqlContext.read.json(filterdWords)
    productInfoDF.registerTempTable("productInfo")
    
    val productInfo = sqlContext.sql("select count(Querystorebyproductid.productId) from productInfo")
    productInfo.foreach(println)
  }

}


2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844882","storeId":"11240051","userId":"1320046117","latitude":"10108.11","longitude":"10086.37"}}
2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844883","storeId":"11240052","userId":"1320046118","latitude":"10109.11","longitude":"10087.37"}}
2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844884","storeId":"11240053","userId":"1320046119","latitude":"10110.11","longitude":"10088.37"}}
2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844885","storeId":"11240054","userId":"1320046120","latitude":"10111.11","longitude":"10089.37"}}
2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844886","storeId":"11240055","userId":"1320046121","latitude":"10112.11","longitude":"10090.37"}}
2017-04-25 17:18:59,173 - {"Querystorebyproductid":{"productId":"8844887","storeId":"11240056","userId":"1320046122","latitude":"10113.11","longitude":"10091.37"}}

0 0
原创粉丝点击