ScalaConn 中使用SPARK 启用hive 代码修改
来源:互联网 发布:mysql 修改表结构 编辑:程序博客网 时间:2024/06/05 14:26
package DAO;import java.sql._import java.text.SimpleDateFormatimport java.util.{List, Date, HashMap, Properties}import com.google.protobuf.TextFormat.ParseExceptionimport org.apache.hadoop.conf.Configurationimport org.apache.hadoop.hbase.{TableName, HBaseConfiguration}import org.apache.hadoop.hbase.client._import org.apache.hadoop.hbase.filter.PageFilterimport org.apache.hadoop.hbase.util.Bytesimport org.apache.hadoop.mapred.JobConfimport org.apache.spark.sql.{Row, DataFrame, SQLContext}import org.apache.spark.{SparkContext, SparkConf}import org.apache.spark.sql.SparkSessionimport org.apache.hadoop.hbase.client.{Table, Connection}import java.text.SimpleDateFormat;import org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.hbase.mapreduce.TableInputFormatimport java.util.Calendar;import java.util.Date;import java.util.GregorianCalendar;import org.apache.spark.sql.catalyst.encoders.ExpressionEncoderimport org.apache.spark.sql.Encoderimport org.apache.spark.rdd.RDDimport org.apache.hadoop.hbase.mapred.TableOutputFormatimport org.slf4j.LoggerFactoryimport java.io.FileInputStreamobject ScalaConn { def LOG = LoggerFactory.getLogger(getClass) /** * 从配置文件properties中获取初始化参数 */ def initParams(): Unit = { try { val prop = new Properties()// val path = Thread.currentThread().getContextClassLoader.getResource("params.properties").getPath// prop.load(new FileInputStream(path)) prop.load(ScalaConn.getClass.getResourceAsStream("/params.properties")) Constant.P2P_DB_URL = prop.getProperty("p2p.url") Constant.P2P_DB_USER = prop.getProperty("p2p.username") Constant.P2P_DB_PWD = prop.getProperty("p2p.password") Constant.HAD_DB_URL = prop.getProperty("hadoop.url") Constant.HAD_DB_USER = prop.getProperty("hadoop.username") Constant.HAD_DB_PWD = prop.getProperty("hadoop.password") Constant.FTOU_DB_URL = prop.getProperty("ftoulanaly.url") Constant.FTOU_DB_USER = prop.getProperty("ftoulanaly.username") Constant.FTOU_DB_PWD = prop.getProperty("ftoulanaly.password") Constant.CRAW_DB_URL = prop.getProperty("ftcrawler.url") Constant.CRAW_DB_USER = prop.getProperty("ftcrawler.username") Constant.CRAW_DB_PWD = prop.getProperty("ftcrawler.password") Constant.DEDE_DB_URL = prop.getProperty("dedetest.url") Constant.DEDE_DB_USER = prop.getProperty("dedetest.username") Constant.DEDE_DB_PWD = prop.getProperty("dedetest.password") Constant.SHOP_DB_URL = prop.getProperty("ftoul_shop.url") Constant.SHOP_DB_USER = prop.getProperty("ftoul_shop.username") Constant.SHOP_DB_PWD = prop.getProperty("ftoul_shop.password") Constant.HBASE_ZOOKEEPER = prop.getProperty("hbase.zookeeper.quorum") Constant.HBASE_MASTER = prop.getProperty("hbase.master") Constant.HBASE_ROOTDIR = prop.getProperty("hbase.rootdir") Constant.SPARK_WAREHOUSE = prop.getProperty("spark.sql.warehouse.dir") Constant.HIVE_METASTORE_URIS = prop.getProperty("hive.metastore.uris") Constant.FS_DEFAULTFS = prop.getProperty("fs.defaultFS") Constant.DFS_NAMESERVICES = prop.getProperty("dfs.nameservices") Constant.DFS_HA_NAMENODES_CLUSTER = prop.getProperty("dfs.ha.namenodes.cluster") Constant.DFS_NAMENODE_RPC_1 = prop.getProperty("dfs.namenode.rpc-address.cluster.nn1") Constant.DFS_NAMENODE_RPC_2 = prop.getProperty("dfs.namenode.rpc-address.cluster.nn2") } catch { case e: Exception => e.printStackTrace() } } def getHbaseConf: Configuration = { val conf: Configuration = HBaseConfiguration.create conf.set("hbase.zookeeper.property.clientPort", "2181") conf.set("spark.executor.memory", "3000m") conf.set("hbase.zookeeper.quorum", Constant.HBASE_ZOOKEEPER) conf.set("hbase.master", Constant.HBASE_MASTER) conf.set("hbase.rootdir", Constant.HBASE_ROOTDIR) conf }// sparkConf.set("es.nodes", Constant.ES_ADDRESS) val driver = "com.mysql.jdbc.Driver"// val jdbcUrl = "jdbc:mysql://192.168.100.233:3306/sp2p628"// val MYSQL_USERNAME: String = "root"// val MYSQL_PWD: String = "dzwang**"// val MYSQL_CONNECTION_URL: String = "jdbc:mysql://192.168.100.233:3306/sp2p628"val warehouseLocation = "file:${system:user.dir}/spark-warehouse"//val spark = SparkSession// .builder()// .master("local[*]") //spark://192.168.100.110:7077// .appName("SparkSQL")// .config("spark.sql.warehouse.dir", "file:///root/project/dcproject/spark-warehouse")// .getOrCreate()////local "local[*]"val spark = SparkSession .builder() .master("local[*]") .appName("SparkHive") .config("spark.sql.warehouse.dir","file:///root/project/dcproject/spark-warehouse") .config("hive.metastore.uris","thrift://sy-003.hadoop:9083") .config("fs.defaultFS", "hdfs://cluster") .config("dfs.nameservices","cluster") .config("dfs.ha.namenodes.cluster","nn1,nn2") .config("dfs.namenode.rpc-address.cluster.nn1","sy-002.hadoop:8020") .config("dfs.namenode.rpc-address.cluster.nn2","sy-003.hadoop:8020") .config("dfs.client.failover.proxy.provider.cluster","org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider") .enableHiveSupport() .getOrCreate() def getJdbcConn {// var connection: Connection = null try { Class.forName(driver)// connection = DriverManager.getConnection(jdbcUrl, MYSQL_USERNAME, MYSQL_PWD)// val statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) // val resultSet = statement.executeQuery("select name, password from scala_t") // while (resultSet.next()) { // val name = resultSet.getString("name") // val password = resultSet.getString("password") // } // return statement; } catch { case e: Exception => { LOG.error(e.toString) } } } def getP2PDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.P2P_DB_URL property.put("user",Constant.P2P_DB_USER) property.put("password", Constant.P2P_DB_PWD) val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF } def getHadoopDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.HAD_DB_URL property.put("user",Constant.HAD_DB_USER) property.put("password", Constant.HAD_DB_PWD) property.put("useSSL","false") val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF } def getHadoopFtoulanalyDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.FTOU_DB_URL property.put("user",Constant.FTOU_DB_USER) property.put("password", Constant.FTOU_DB_PWD) val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF } def getHadoopFtcrawlerDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.CRAW_DB_URL property.put("user",Constant.CRAW_DB_USER) property.put("password", Constant.CRAW_DB_PWD) val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF } def getHadoopDedetestDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.DEDE_DB_URL property.put("user", Constant.DEDE_DB_USER) property.put("password", Constant.DEDE_DB_PWD) val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF } def getShopDataFrame(tableName: String): DataFrame = { val property = new Properties() val url = Constant.SHOP_DB_URL property.put("user",Constant.SHOP_DB_USER) property.put("password", Constant.SHOP_DB_PWD) val jdbcDF = spark.read.jdbc(url,tableName, property) jdbcDF }// def getHaseDataFrame(tableName: String):DataFrame=// {// getHbaseConf.set(TableInputFormat.INPUT_TABLE,tableName)// import spark.implicits._// val hbaseRDD = spark.sparkContext.newAPIHadoopRDD(getHbaseConf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])// val HbaseDF = hbaseRDD.map(r=>(// Bytes.toString(r._2.getValue(Bytes.toBytes("info"),Bytes.toBytes("customer_id"))),// Bytes.toString(r._2.getValue(Bytes.toBytes("info"),Bytes.toBytes("create_id")))// )).toDF("customer_id","create_id")// shop.createOrReplaceTempView("shop")// HbaseDF// } /** * 获得指定日期的前一天 */ def getSpecifiedDayBefore(specifiedDay:String):String = { //SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); var c:Calendar = Calendar.getInstance(); var date:Date =null; try { date = new SimpleDateFormat("yyyyMMdd").parse(specifiedDay) } catch { case e: Exception => { e.printStackTrace() } } c.setTime(date); var day =c.get(Calendar.DATE); c.set(Calendar.DATE,day-1); var dayBefore:String =new SimpleDateFormat("yyyyMMdd").format(c.getTime()); dayBefore } /** * 获得指定日期的后一天 */ def getSpecifiedDayAfter(specifiedDay:String):String = { //SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); var c:Calendar = Calendar.getInstance(); var date:Date =null; try { date = new SimpleDateFormat("yyyyMMdd").parse(specifiedDay) } catch { case e: Exception => { e.printStackTrace() } } c.setTime(date); var day =c.get(Calendar.DATE); c.set(Calendar.DATE,day+1); var dayBefore:String =new SimpleDateFormat("yyyyMMdd").format(c.getTime()); dayBefore } def GetStampByTime(time: String): Timestamp = { var Stamp: Timestamp = new Timestamp(System.currentTimeMillis) val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") var date: Date = null try { date = sdf.parse(time) Stamp = new Timestamp(date.getTime)// println("DT:"+Stamp) } catch { case e: Exception => { LOG.error(e.toString) } } Stamp } def GetStampByTime1(time: String): Timestamp = { var Stamp: Timestamp = new Timestamp(System.currentTimeMillis) val sdf: SimpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") var date: Date = null try { date = sdf.parse(time) Stamp = new Timestamp(date.getTime)// println("DT:"+Stamp) } catch { case e: Exception => { LOG.error(e.toString) } } Stamp } def evaluate( i:Int): String = { var date:Date = new Date();//取时间 var sf:SimpleDateFormat = new SimpleDateFormat("yyyyMMdd");// var nowDate:String = sf.format(date);// println("nowDate="+nowDate) var time:Long = (date.getTime() / 1000) + 60 * 60 * 24 * i;//秒 date.setTime(time * 1000);//毫秒 var nextDate:String = sf.format(date);// println("nextDate="+nextDate) nextDate }// fmt: yyyy-MM-dd HH:mm:ss def getYesterday(i:Int,fmt:String):String= { var ft = "" if (fmt.isEmpty){ ft = "yyyy-MM-dd HH:mm:ss" }else{ ft = fmt } var dateFormat: SimpleDateFormat = new SimpleDateFormat(ft) var cal: Calendar = Calendar.getInstance() cal.add(Calendar.DATE, i) var yesterday = dateFormat.format(cal.getTime()) yesterday } def getMaxDate(conf: Configuration, hconn: Connection,table: Table, colFamily: String, dt: String): String = { var maxDate: String = "2014-12-01 00:00:00" var results: ResultScanner = null// val table: Table = hconn.getTable(TableName.valueOf(tableName)) try { val scan: Scan = new Scan scan.setReversed(true) scan.setMaxVersions() scan.setMaxResultSize(1) scan.setFilter(new PageFilter(1)) scan.addFamily(Bytes.toBytes(colFamily)); scan.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(dt)); results = table.getScanner(scan)// println("results =================="+results.next()) var i = 0 import scala.collection.JavaConversions._ for (r <- results) { if( i == 0) { maxDate = new String(r.getValue(Bytes.toBytes(colFamily), Bytes.toBytes(dt))) }// i= i+1// println("maxDate =================="+maxDate) } if ((maxDate == "") || (maxDate == null)|| (maxDate == "null")) { maxDate = "2014-12-01 00:00:00" } LOG.info("maxDate="+maxDate) } catch { case e: Exception => { LOG.error(e.toString) } } finally { try { if (results != null) results.close } catch { case e: Exception => { LOG.error(e.toString) } } } maxDate } def addZeroForNum(str: String, strLength:Int): String = { var strg = str var strLen:Int = strg.length() var sb:StringBuffer = null; while (strLen < strLength) { sb = new StringBuffer(); sb.append("0").append(strg);// 左补0 // sb.append(str).append("0");//右补0 strg = sb.toString(); strLen = strg.length(); } return strg; } }
阅读全文
0 0
- ScalaConn 中使用SPARK 启用hive 代码修改
- ScalaConn
- Spark Hive使用中遇到的坑
- Spark Hive在Eclipse代码中直接编译问题
- spark 使用hive metastore
- spark操作hive简单代码
- Spark(Hive) SQL中UDF的使用(Python)
- spark sql 中 hive变量的使用记录
- spark使用Hive表操作
- YARN、Spark、Hive使用kerberos
- 在Spark中使用UDF对HIVE表进行查询,再将查询结果RDD写入另一个HIVE表
- Spark SQL中实现Hive MapJoin
- Spark SQL中实现Hive MapJoin
- Spark SQL在Hive中的使用
- spark使用hive出错,添加以下配置
- spark sql 使用hive作为数据源
- Spark SQL和Hive使用场景?
- spark sql on hive配置及其使用
- 关于Scroller(转载)
- 成为软件测试人员所必须掌握的六项技能
- Android6.0 创建TYPE_SYSTEM_ALERT级别的弹出框方法
- office2010安装提示需要MSXML版本6.10.1129.0
- 一起学Kotlin(2)
- ScalaConn 中使用SPARK 启用hive 代码修改
- Mybaits初识
- tar.gz解压命令
- Oracle如何查看SQL的解释计划
- Guava常用方法
- CoordinatorLayout源码解析之初识Behavior
- MFC中setimer和ontimer定时器函数到时间不能触发执行是因为WM_TIMER消息的优先级低
- linux php添加pdo_mysql 扩展时报错 fatal error: ext/mysqlnd/mysqlnd.h: No such file or directory
- tomcat8.0 startup报错java.util.logging.ErrorManager: 4