ScalaConn 中使用SPARK 启用hive 代码修改

来源:互联网 发布:mysql 修改表结构 编辑:程序博客网 时间:2024/06/05 14:26
package DAO;import java.sql._import java.text.SimpleDateFormatimport java.util.{List, Date, HashMap, Properties}import com.google.protobuf.TextFormat.ParseExceptionimport org.apache.hadoop.conf.Configurationimport org.apache.hadoop.hbase.{TableName, HBaseConfiguration}import org.apache.hadoop.hbase.client._import org.apache.hadoop.hbase.filter.PageFilterimport org.apache.hadoop.hbase.util.Bytesimport org.apache.hadoop.mapred.JobConfimport org.apache.spark.sql.{Row, DataFrame, SQLContext}import org.apache.spark.{SparkContext, SparkConf}import org.apache.spark.sql.SparkSessionimport org.apache.hadoop.hbase.client.{Table, Connection}import java.text.SimpleDateFormat;import org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.hbase.mapreduce.TableInputFormatimport java.util.Calendar;import java.util.Date;import java.util.GregorianCalendar;import org.apache.spark.sql.catalyst.encoders.ExpressionEncoderimport org.apache.spark.sql.Encoderimport org.apache.spark.rdd.RDDimport org.apache.hadoop.hbase.mapred.TableOutputFormatimport org.slf4j.LoggerFactoryimport java.io.FileInputStreamobject ScalaConn {  def LOG = LoggerFactory.getLogger(getClass)  /**   * 从配置文件properties中获取初始化参数   */  def initParams(): Unit = {    try {      val prop = new Properties()//      val path = Thread.currentThread().getContextClassLoader.getResource("params.properties").getPath//      prop.load(new FileInputStream(path))      prop.load(ScalaConn.getClass.getResourceAsStream("/params.properties"))      Constant.P2P_DB_URL = prop.getProperty("p2p.url")      Constant.P2P_DB_USER = prop.getProperty("p2p.username")      Constant.P2P_DB_PWD = prop.getProperty("p2p.password")      Constant.HAD_DB_URL = prop.getProperty("hadoop.url")      Constant.HAD_DB_USER = prop.getProperty("hadoop.username")      Constant.HAD_DB_PWD = prop.getProperty("hadoop.password")      Constant.FTOU_DB_URL = prop.getProperty("ftoulanaly.url")      Constant.FTOU_DB_USER = prop.getProperty("ftoulanaly.username")      Constant.FTOU_DB_PWD = prop.getProperty("ftoulanaly.password")      Constant.CRAW_DB_URL = prop.getProperty("ftcrawler.url")      Constant.CRAW_DB_USER = prop.getProperty("ftcrawler.username")      Constant.CRAW_DB_PWD = prop.getProperty("ftcrawler.password")      Constant.DEDE_DB_URL = prop.getProperty("dedetest.url")      Constant.DEDE_DB_USER = prop.getProperty("dedetest.username")      Constant.DEDE_DB_PWD = prop.getProperty("dedetest.password")      Constant.SHOP_DB_URL = prop.getProperty("ftoul_shop.url")      Constant.SHOP_DB_USER = prop.getProperty("ftoul_shop.username")      Constant.SHOP_DB_PWD = prop.getProperty("ftoul_shop.password")      Constant.HBASE_ZOOKEEPER = prop.getProperty("hbase.zookeeper.quorum")      Constant.HBASE_MASTER = prop.getProperty("hbase.master")      Constant.HBASE_ROOTDIR = prop.getProperty("hbase.rootdir")      Constant.SPARK_WAREHOUSE = prop.getProperty("spark.sql.warehouse.dir")      Constant.HIVE_METASTORE_URIS = prop.getProperty("hive.metastore.uris")      Constant.FS_DEFAULTFS = prop.getProperty("fs.defaultFS")      Constant.DFS_NAMESERVICES = prop.getProperty("dfs.nameservices")      Constant.DFS_HA_NAMENODES_CLUSTER = prop.getProperty("dfs.ha.namenodes.cluster")      Constant.DFS_NAMENODE_RPC_1 = prop.getProperty("dfs.namenode.rpc-address.cluster.nn1")      Constant.DFS_NAMENODE_RPC_2 = prop.getProperty("dfs.namenode.rpc-address.cluster.nn2")    } catch {      case e: Exception =>        e.printStackTrace()    }  }  def getHbaseConf: Configuration = {    val conf: Configuration = HBaseConfiguration.create    conf.set("hbase.zookeeper.property.clientPort", "2181")    conf.set("spark.executor.memory", "3000m")    conf.set("hbase.zookeeper.quorum", Constant.HBASE_ZOOKEEPER)    conf.set("hbase.master", Constant.HBASE_MASTER)    conf.set("hbase.rootdir", Constant.HBASE_ROOTDIR)    conf  }//  sparkConf.set("es.nodes", Constant.ES_ADDRESS)  val driver = "com.mysql.jdbc.Driver"//  val jdbcUrl = "jdbc:mysql://192.168.100.233:3306/sp2p628"//  val MYSQL_USERNAME: String = "root"//  val MYSQL_PWD: String = "dzwang**"//  val MYSQL_CONNECTION_URL: String = "jdbc:mysql://192.168.100.233:3306/sp2p628"val warehouseLocation = "file:${system:user.dir}/spark-warehouse"//val spark = SparkSession//  .builder()//  .master("local[*]") //spark://192.168.100.110:7077//  .appName("SparkSQL")//  .config("spark.sql.warehouse.dir", "file:///root/project/dcproject/spark-warehouse")//  .getOrCreate()////local "local[*]"val spark = SparkSession  .builder()  .master("local[*]")  .appName("SparkHive")  .config("spark.sql.warehouse.dir","file:///root/project/dcproject/spark-warehouse")  .config("hive.metastore.uris","thrift://sy-003.hadoop:9083")  .config("fs.defaultFS", "hdfs://cluster")  .config("dfs.nameservices","cluster")  .config("dfs.ha.namenodes.cluster","nn1,nn2")  .config("dfs.namenode.rpc-address.cluster.nn1","sy-002.hadoop:8020")  .config("dfs.namenode.rpc-address.cluster.nn2","sy-003.hadoop:8020")  .config("dfs.client.failover.proxy.provider.cluster","org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")  .enableHiveSupport()  .getOrCreate()  def getJdbcConn {//    var connection: Connection = null    try {      Class.forName(driver)//      connection = DriverManager.getConnection(jdbcUrl, MYSQL_USERNAME, MYSQL_PWD)//      val statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)      // val resultSet = statement.executeQuery("select name, password from scala_t")      //  while (resultSet.next()) {      //         val name = resultSet.getString("name")      //        val password = resultSet.getString("password")      // }      //  return statement;    } catch {      case e: Exception => {        LOG.error(e.toString)        }    }  }    def getP2PDataFrame(tableName: String): DataFrame =  {    val property = new Properties()   val url =  Constant.P2P_DB_URL    property.put("user",Constant.P2P_DB_USER)    property.put("password", Constant.P2P_DB_PWD)    val jdbcDF = spark.read.jdbc(url,tableName, property)     jdbcDF  }  def getHadoopDataFrame(tableName: String): DataFrame =  {        val property = new Properties()        val url = Constant.HAD_DB_URL        property.put("user",Constant.HAD_DB_USER)        property.put("password", Constant.HAD_DB_PWD)        property.put("useSSL","false")        val jdbcDF = spark.read.jdbc(url,tableName, property)       jdbcDF  }  def getHadoopFtoulanalyDataFrame(tableName: String): DataFrame =  {    val property = new Properties()    val url = Constant.FTOU_DB_URL    property.put("user",Constant.FTOU_DB_USER)    property.put("password", Constant.FTOU_DB_PWD)    val jdbcDF = spark.read.jdbc(url,tableName, property)    jdbcDF  }  def getHadoopFtcrawlerDataFrame(tableName: String): DataFrame =  {    val property = new Properties()    val url = Constant.CRAW_DB_URL    property.put("user",Constant.CRAW_DB_USER)    property.put("password", Constant.CRAW_DB_PWD)    val jdbcDF = spark.read.jdbc(url,tableName, property)    jdbcDF  }  def getHadoopDedetestDataFrame(tableName: String): DataFrame =  {    val property = new Properties()    val url = Constant.DEDE_DB_URL    property.put("user", Constant.DEDE_DB_USER)    property.put("password", Constant.DEDE_DB_PWD)    val jdbcDF = spark.read.jdbc(url,tableName, property)    jdbcDF  }  def getShopDataFrame(tableName: String): DataFrame =  {    val property = new Properties()    val url = Constant.SHOP_DB_URL    property.put("user",Constant.SHOP_DB_USER)    property.put("password", Constant.SHOP_DB_PWD)    val jdbcDF = spark.read.jdbc(url,tableName, property)    jdbcDF  }//  def getHaseDataFrame(tableName: String):DataFrame=//  {//    getHbaseConf.set(TableInputFormat.INPUT_TABLE,tableName)//    import spark.implicits._//    val hbaseRDD = spark.sparkContext.newAPIHadoopRDD(getHbaseConf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])//    val HbaseDF = hbaseRDD.map(r=>(//      Bytes.toString(r._2.getValue(Bytes.toBytes("info"),Bytes.toBytes("customer_id"))),//      Bytes.toString(r._2.getValue(Bytes.toBytes("info"),Bytes.toBytes("create_id")))//      )).toDF("customer_id","create_id")//    shop.createOrReplaceTempView("shop")//    HbaseDF//  }  /**   * 获得指定日期的前一天   */  def getSpecifiedDayBefore(specifiedDay:String):String = {    //SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");    var c:Calendar = Calendar.getInstance();    var date:Date =null;    try {      date = new SimpleDateFormat("yyyyMMdd").parse(specifiedDay)    }  catch {      case e: Exception => {        e.printStackTrace()      }    }    c.setTime(date);     var day =c.get(Calendar.DATE);    c.set(Calendar.DATE,day-1);   var dayBefore:String =new SimpleDateFormat("yyyyMMdd").format(c.getTime());     dayBefore  }  /**   * 获得指定日期的后一天   */  def getSpecifiedDayAfter(specifiedDay:String):String = {    //SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");    var c:Calendar = Calendar.getInstance();    var date:Date =null;    try {      date = new SimpleDateFormat("yyyyMMdd").parse(specifiedDay)    }  catch {      case e: Exception => {        e.printStackTrace()      }    }    c.setTime(date);    var day =c.get(Calendar.DATE);    c.set(Calendar.DATE,day+1);    var dayBefore:String =new SimpleDateFormat("yyyyMMdd").format(c.getTime());    dayBefore  }    def GetStampByTime(time: String): Timestamp = {      var Stamp: Timestamp = new Timestamp(System.currentTimeMillis)      val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")      var date: Date = null      try {        date = sdf.parse(time)        Stamp = new Timestamp(date.getTime)//        println("DT:"+Stamp)      }      catch {        case e: Exception => {          LOG.error(e.toString)        }      }      Stamp    }    def GetStampByTime1(time: String): Timestamp = {      var Stamp: Timestamp = new Timestamp(System.currentTimeMillis)      val sdf: SimpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")      var date: Date = null      try {        date = sdf.parse(time)        Stamp = new Timestamp(date.getTime)//        println("DT:"+Stamp)      }      catch {        case e: Exception => {          LOG.error(e.toString)        }      }      Stamp    }  def evaluate( i:Int): String = {    var date:Date = new Date();//取时间    var sf:SimpleDateFormat = new SimpleDateFormat("yyyyMMdd");//    var nowDate:String  = sf.format(date);//    println("nowDate="+nowDate)    var time:Long  = (date.getTime() / 1000) + 60 * 60 * 24 * i;//秒    date.setTime(time * 1000);//毫秒    var nextDate:String  = sf.format(date);//    println("nextDate="+nextDate)    nextDate  }//  fmt: yyyy-MM-dd HH:mm:ss  def getYesterday(i:Int,fmt:String):String= {  var ft = ""   if (fmt.isEmpty){     ft = "yyyy-MM-dd HH:mm:ss"   }else{     ft = fmt   }    var dateFormat: SimpleDateFormat = new SimpleDateFormat(ft)    var cal: Calendar = Calendar.getInstance()    cal.add(Calendar.DATE, i)    var yesterday = dateFormat.format(cal.getTime())    yesterday  }    def getMaxDate(conf: Configuration, hconn: Connection,table: Table, colFamily: String, dt: String): String = {      var maxDate: String = "2014-12-01 00:00:00"      var results: ResultScanner = null//      val table: Table = hconn.getTable(TableName.valueOf(tableName))      try {        val scan: Scan = new Scan        scan.setReversed(true)        scan.setMaxVersions()        scan.setMaxResultSize(1)        scan.setFilter(new PageFilter(1))       scan.addFamily(Bytes.toBytes(colFamily));       scan.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(dt));        results = table.getScanner(scan)//        println("results =================="+results.next())        var i = 0        import scala.collection.JavaConversions._       for (r <- results)  {         if( i == 0) {           maxDate = new String(r.getValue(Bytes.toBytes(colFamily), Bytes.toBytes(dt)))         }//         i= i+1//          println("maxDate =================="+maxDate)        }        if ((maxDate == "") || (maxDate == null)|| (maxDate == "null")) {          maxDate = "2014-12-01 00:00:00"        }        LOG.info("maxDate="+maxDate)      }      catch {        case e: Exception => {          LOG.error(e.toString)        }      } finally {        try {          if (results != null) results.close        }        catch {          case e: Exception => {            LOG.error(e.toString)          }        }      }       maxDate    }  def addZeroForNum(str: String, strLength:Int): String = {    var strg = str    var  strLen:Int  = strg.length()    var sb:StringBuffer  = null;    while (strLen < strLength) {      sb = new StringBuffer();      sb.append("0").append(strg);// 左补0      // sb.append(str).append("0");//右补0      strg = sb.toString();      strLen = strg.length();    }    return strg;  }  }
原创粉丝点击