hbaseFilter使用,以及简单封装

来源:互联网 发布:sem优化策略 编辑:程序博客网 时间:2024/06/10 02:12

学习笔记:

简单的将spark访问hbase使用filter过滤的操作进行封装,

功能并不完善,无法做到把case class以反射的方式传入到方法中进行字段的构造                             

把注册表的操作放在单独处理的部分


//不使用case class也可以用于构建DF,用元组替换但是无法指定对应列的名字(_1,_2,_3,_4...)

package cn.DaLong_hbaseimport java.utilimport scala.collection.JavaConverters._import org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.hbase.client.{HTable, ResultScanner, Scan}import org.apache.hadoop.hbase.filter._import org.apache.hadoop.hbase.mapreduce.TableInputFormatimport org.apache.hadoop.hbase.util.Bytesimport org.apache.log4j.{Level, Logger}import org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContext/**  * Created by DreamBoy on 2017/5/10.  * 功能:完成表的基本条件  *       表字段的构建  *       表的注册  */class GetTableNm{  /**    * 利用主构造器构造需要的环境的基本条件    */  Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)  //设置spark参数  val conf =new SparkConf().setMaster("local[2]").setAppName("HbaseTest")  conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")  val sc = new SparkContext(conf)  val hbaseConf = HBaseConfiguration.create()  val sqlContext = new SQLContext(sc)  //配置HBase  hbaseConf.set("hbase.rootdir", "hdfs://http://192.168.10.228/hbase")  hbaseConf.set("hbase.zookeeper.quorum", "192.168.10.228,192.168.10.229,192.168.10.230,192.168.10.231,192.168.10.232")  hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")  hbaseConf.set("hbase.master", "192.168.10.230")  /**    * @param tbl_nm       表名    * @param show_info    需要展示的表的schema名字和row名,每个元组是一个(schema,row)    * @param filter_info  过滤条件用到的schema名字,row名字,row值,筛选条件 每个元组是一个(schema,row,filer_value,cmp_type)    * @return             返回sqlcontext用于sql查询使用    *    * 只需要创建一次GetTableNm对象就可以操作创建所有的表了    *    *  对于返回值构建第一个表的时候接受一下sqlcontext,之后的表不需要重复接受这个参数,直接使用之前的就可以了    */  def GetTableNm(tbl_nm:String,show_info:Array[(String,String)],filter_info:Array[(String,String,String,String)]):(SQLContext,ResultScanner)={    //定义表Hbase表的名字    val tableName = tbl_nm    //设置需要在hbase中查询的表名    hbaseConf.set(TableInputFormat.INPUT_TABLE, tableName)    //构建表    val table = new HTable(hbaseConf,tableName)    val scan = new Scan()    //指定列族和需要显示的列名    //添加多个需要用到的列    val length = show_info.length    for(i <- show_info){      scan.addColumn(Bytes.toBytes(i._1),Bytes.toBytes(i._2))    }   /* scan.addColumn(Bytes.toBytes("basicinfo"),Bytes.toBytes("WAYBILL_NO"))    scan.addColumn(Bytes.toBytes("basicinfo"),Bytes.toBytes("PENDING_TYPE"))*/    //设置rowkey的范围,启示和结束    //scan.setStartRow(Bytes.toBytes(""))    //scan.setStopRow(Bytes.toBytes(""))    val fil_len = filter_info.length    val filter_arr = new util.ArrayList[Filter](fil_len)    for(i <- filter_info){      i._4 match {        case "=" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.EQUAL,new BinaryComparator(Bytes. toBytes(i._3)))          filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }        case "<" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes. toBytes( i._3)))          filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }        case "<=" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes. toBytes( i._3)))          filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }        case ">" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.GREATER,new BinaryComparator(Bytes. toBytes( i._3)))          filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }        case ">=" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.GREATER_OR_EQUAL,new BinaryComparator(Bytes. toBytes( i._3)))          //filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }        case "!=" =>{          val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1),            Bytes.toBytes(i._2),CompareFilter.CompareOp.NOT_EQUAL,new BinaryComparator(Bytes. toBytes( i._3)))          filter1.setFilterIfMissing(true)          filter_arr.add(filter1)        }      }    }    /**      * 通过使用filterlist可以加载多个过滤器      * 设置多个过滤器      */    val filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL,filter_arr)    scan.setFilter(filterList)    //获取表的扫描     val ColumnValueScanner = table.getScanner(scan)    /**      * 将表转换并注册成表      * result_rdd  是result类型,保存了表中的相关信息,可以取出对应的数据      * 并创建表      */    (sqlContext,ColumnValueScanner)  }  def close(ColumnValueScanner:ResultScanner): Unit ={    if(ColumnValueScanner!=null){      ColumnValueScanner.close()    }    sc.stop()  }}object Hbase_spark_PSZ {  def main(args: Array[String]): Unit = {    val getTableNm = new GetTableNm    //def GetTableNm(tbl_nm:String,show_info:Array[(String,String)],filter_info:Array[(String,String,String,String)]){    val arr_col = Array(("basicmod","cookieid"),("basicmod","createtime"),("basicmod","pv"))    val arr_filter =Array(("basicmod","pv","5",">="),("basicmod","createtime","20150411",">="))    val result = getTableNm.GetTableNm("deppon_test",arr_col,arr_filter)    val sQLContext = result._1    val ColumnValueScanner = result._2    /**      * 注册成表      * 用for循环构建对应的列      */    //val res_arr = new Array[String](length)    val result_rdd = ColumnValueScanner.iterator().asScala    val table_nm = result_rdd.map{x=>{      val key = Bytes.toString(x.getRow)      val cookieid = Bytes.toString(x.getValue("basicmod".getBytes, "cookieid".getBytes))      val createtime = Bytes.toString(x.getValue("basicmod".getBytes, "createtime".getBytes))      val pv = Bytes.toString(x.getValue("basicmod".getBytes, "pv".getBytes))      (key,cookieid,createtime,pv)    }}.toList    //导入隐式转换    import sQLContext.implicits._    //构建dataframe    val tbl_rdd = table_nm.map(x=>tbl_test2(x._1,x._2,x._3,x._4)).toDF()    //注册表    tbl_rdd.registerTempTable("deppon_test")    sQLContext.sql("select * from deppon_test").show()//.write.save("hdfs://ns1/DL_test1")    /**      * 测试两张表的情况      *      */    val arr_col1 = Array(("basicinfo","WAYBILL_NO"),("basicinfo","PENDING_TYPE"),("basicinfo","BILL_TIME"))    val arr_filter1 =Array(("basicinfo","WAYBILL_NO","401919016",">="),("basicinfo","PENDING_TYPE","PC_ACTIVE","="))    val result1 = getTableNm.GetTableNm("RDVS.T_SRV_WAYBILL",arr_col1,arr_filter1)    //val sQLContext = result._1    val ColumnValueScanner1 = result1._2    /**      * 注册成表      * 用for循环构建对应的列      */    //val res_arr = new Array[String](length)    val result_rdd1 = ColumnValueScanner1.iterator().asScala    val table_nm1 = result_rdd1.map{x=>{      val key = Bytes.toString(x.getRow)      val cookieid = Bytes.toString(x.getValue("basicinfo".getBytes, "WAYBILL_NO".getBytes))      val createtime = Bytes.toString(x.getValue("basicinfo".getBytes, "PENDING_TYPE".getBytes))      val pv = Bytes.toString(x.getValue("basicinfo".getBytes, "BILL_TIME".getBytes))      (key,cookieid,createtime,pv)    }}.toList    //导入隐式转换    import sQLContext.implicits._    //构建dataframe    val tbl_rdd1 = table_nm1.map(x=>tbl_test2(x._1,x._2,x._3,x._4)).toDF()    //注册表    tbl_rdd1.registerTempTable("T_SRV_WAYBILL")    sQLContext.sql("select * from T_SRV_WAYBILL").show()//.write.save("hdfs://ns1/DL_test1")    //关闭资源    getTableNm.close(ColumnValueScanner)  }}case class tbl_test2(id:String,cookieid:String,createtime:String,pv:String)



java反射的基本代码:

/** * Created by DreamBoy on 2017/5/11. */public class A {    public A(){}    public A(int a,String b){        this.a = a;        this.b = b;    }    public int a;    public String b;    public int getA(){        return a;    }    public void setA(int a){        this.a = a;    }    public  String getB(){        return b;    }    public void getB(String b){        this.b = b;    }}
//操作反射类   
import java.lang.reflect.Constructor;/** * Created by DreamBoy on 2017/5/11. */public class test_reflect {    public test_reflect(){}    public static void main(String[] args) throws Exception    {        String cookieid = "asad";        String b = "123";        Object[] arg= new Object[] { cookieid,b,cookieid,b};        A tempClass = (A)(Class.forName("cn.DaLong_hbase.A").newInstance());        Class[] parameter = new Class[]{String.class,String.class,String.class,String.class};        Constructor con = tempClass.getClass().getConstructor(parameter);       // Object[] arg= new Object[] { cookieid,b,cookieid,b};        A a = (A)con.newInstance(arg);        System.out.println(a.getA()+"------"+a.getB());    }}

   
0 0
原创粉丝点击