hbaseFilter使用,以及简单封装
来源:互联网 发布:sem优化策略 编辑:程序博客网 时间:2024/06/10 02:12
学习笔记:
简单的将spark访问hbase使用filter过滤的操作进行封装,
功能并不完善,无法做到把case class以反射的方式传入到方法中进行字段的构造
把注册表的操作放在单独处理的部分
//不使用case class也可以用于构建DF,用元组替换但是无法指定对应列的名字(_1,_2,_3,_4...)
package cn.DaLong_hbaseimport java.utilimport scala.collection.JavaConverters._import org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.hbase.client.{HTable, ResultScanner, Scan}import org.apache.hadoop.hbase.filter._import org.apache.hadoop.hbase.mapreduce.TableInputFormatimport org.apache.hadoop.hbase.util.Bytesimport org.apache.log4j.{Level, Logger}import org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContext/** * Created by DreamBoy on 2017/5/10. * 功能:完成表的基本条件 * 表字段的构建 * 表的注册 */class GetTableNm{ /** * 利用主构造器构造需要的环境的基本条件 */ Logger.getLogger("org.apache.spark").setLevel(Level.ERROR) //设置spark参数 val conf =new SparkConf().setMaster("local[2]").setAppName("HbaseTest") conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val sc = new SparkContext(conf) val hbaseConf = HBaseConfiguration.create() val sqlContext = new SQLContext(sc) //配置HBase hbaseConf.set("hbase.rootdir", "hdfs://http://192.168.10.228/hbase") hbaseConf.set("hbase.zookeeper.quorum", "192.168.10.228,192.168.10.229,192.168.10.230,192.168.10.231,192.168.10.232") hbaseConf.set("hbase.zookeeper.property.clientPort", "2181") hbaseConf.set("hbase.master", "192.168.10.230") /** * @param tbl_nm 表名 * @param show_info 需要展示的表的schema名字和row名,每个元组是一个(schema,row) * @param filter_info 过滤条件用到的schema名字,row名字,row值,筛选条件 每个元组是一个(schema,row,filer_value,cmp_type) * @return 返回sqlcontext用于sql查询使用 * * 只需要创建一次GetTableNm对象就可以操作创建所有的表了 * * 对于返回值构建第一个表的时候接受一下sqlcontext,之后的表不需要重复接受这个参数,直接使用之前的就可以了 */ def GetTableNm(tbl_nm:String,show_info:Array[(String,String)],filter_info:Array[(String,String,String,String)]):(SQLContext,ResultScanner)={ //定义表Hbase表的名字 val tableName = tbl_nm //设置需要在hbase中查询的表名 hbaseConf.set(TableInputFormat.INPUT_TABLE, tableName) //构建表 val table = new HTable(hbaseConf,tableName) val scan = new Scan() //指定列族和需要显示的列名 //添加多个需要用到的列 val length = show_info.length for(i <- show_info){ scan.addColumn(Bytes.toBytes(i._1),Bytes.toBytes(i._2)) } /* scan.addColumn(Bytes.toBytes("basicinfo"),Bytes.toBytes("WAYBILL_NO")) scan.addColumn(Bytes.toBytes("basicinfo"),Bytes.toBytes("PENDING_TYPE"))*/ //设置rowkey的范围,启示和结束 //scan.setStartRow(Bytes.toBytes("")) //scan.setStopRow(Bytes.toBytes("")) val fil_len = filter_info.length val filter_arr = new util.ArrayList[Filter](fil_len) for(i <- filter_info){ i._4 match { case "=" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.EQUAL,new BinaryComparator(Bytes. toBytes(i._3))) filter1.setFilterIfMissing(true) filter_arr.add(filter1) } case "<" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes. toBytes( i._3))) filter1.setFilterIfMissing(true) filter_arr.add(filter1) } case "<=" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes. toBytes( i._3))) filter1.setFilterIfMissing(true) filter_arr.add(filter1) } case ">" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.GREATER,new BinaryComparator(Bytes. toBytes( i._3))) filter1.setFilterIfMissing(true) filter_arr.add(filter1) } case ">=" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.GREATER_OR_EQUAL,new BinaryComparator(Bytes. toBytes( i._3))) //filter1.setFilterIfMissing(true) filter_arr.add(filter1) } case "!=" =>{ val filter1 = new SingleColumnValueFilter(Bytes.toBytes( i._1), Bytes.toBytes(i._2),CompareFilter.CompareOp.NOT_EQUAL,new BinaryComparator(Bytes. toBytes( i._3))) filter1.setFilterIfMissing(true) filter_arr.add(filter1) } } } /** * 通过使用filterlist可以加载多个过滤器 * 设置多个过滤器 */ val filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL,filter_arr) scan.setFilter(filterList) //获取表的扫描 val ColumnValueScanner = table.getScanner(scan) /** * 将表转换并注册成表 * result_rdd 是result类型,保存了表中的相关信息,可以取出对应的数据 * 并创建表 */ (sqlContext,ColumnValueScanner) } def close(ColumnValueScanner:ResultScanner): Unit ={ if(ColumnValueScanner!=null){ ColumnValueScanner.close() } sc.stop() }}object Hbase_spark_PSZ { def main(args: Array[String]): Unit = { val getTableNm = new GetTableNm //def GetTableNm(tbl_nm:String,show_info:Array[(String,String)],filter_info:Array[(String,String,String,String)]){ val arr_col = Array(("basicmod","cookieid"),("basicmod","createtime"),("basicmod","pv")) val arr_filter =Array(("basicmod","pv","5",">="),("basicmod","createtime","20150411",">=")) val result = getTableNm.GetTableNm("deppon_test",arr_col,arr_filter) val sQLContext = result._1 val ColumnValueScanner = result._2 /** * 注册成表 * 用for循环构建对应的列 */ //val res_arr = new Array[String](length) val result_rdd = ColumnValueScanner.iterator().asScala val table_nm = result_rdd.map{x=>{ val key = Bytes.toString(x.getRow) val cookieid = Bytes.toString(x.getValue("basicmod".getBytes, "cookieid".getBytes)) val createtime = Bytes.toString(x.getValue("basicmod".getBytes, "createtime".getBytes)) val pv = Bytes.toString(x.getValue("basicmod".getBytes, "pv".getBytes)) (key,cookieid,createtime,pv) }}.toList //导入隐式转换 import sQLContext.implicits._ //构建dataframe val tbl_rdd = table_nm.map(x=>tbl_test2(x._1,x._2,x._3,x._4)).toDF() //注册表 tbl_rdd.registerTempTable("deppon_test") sQLContext.sql("select * from deppon_test").show()//.write.save("hdfs://ns1/DL_test1") /** * 测试两张表的情况 * */ val arr_col1 = Array(("basicinfo","WAYBILL_NO"),("basicinfo","PENDING_TYPE"),("basicinfo","BILL_TIME")) val arr_filter1 =Array(("basicinfo","WAYBILL_NO","401919016",">="),("basicinfo","PENDING_TYPE","PC_ACTIVE","=")) val result1 = getTableNm.GetTableNm("RDVS.T_SRV_WAYBILL",arr_col1,arr_filter1) //val sQLContext = result._1 val ColumnValueScanner1 = result1._2 /** * 注册成表 * 用for循环构建对应的列 */ //val res_arr = new Array[String](length) val result_rdd1 = ColumnValueScanner1.iterator().asScala val table_nm1 = result_rdd1.map{x=>{ val key = Bytes.toString(x.getRow) val cookieid = Bytes.toString(x.getValue("basicinfo".getBytes, "WAYBILL_NO".getBytes)) val createtime = Bytes.toString(x.getValue("basicinfo".getBytes, "PENDING_TYPE".getBytes)) val pv = Bytes.toString(x.getValue("basicinfo".getBytes, "BILL_TIME".getBytes)) (key,cookieid,createtime,pv) }}.toList //导入隐式转换 import sQLContext.implicits._ //构建dataframe val tbl_rdd1 = table_nm1.map(x=>tbl_test2(x._1,x._2,x._3,x._4)).toDF() //注册表 tbl_rdd1.registerTempTable("T_SRV_WAYBILL") sQLContext.sql("select * from T_SRV_WAYBILL").show()//.write.save("hdfs://ns1/DL_test1") //关闭资源 getTableNm.close(ColumnValueScanner) }}case class tbl_test2(id:String,cookieid:String,createtime:String,pv:String)
/** * Created by DreamBoy on 2017/5/11. */public class A { public A(){} public A(int a,String b){ this.a = a; this.b = b; } public int a; public String b; public int getA(){ return a; } public void setA(int a){ this.a = a; } public String getB(){ return b; } public void getB(String b){ this.b = b; }}//操作反射类import java.lang.reflect.Constructor;/** * Created by DreamBoy on 2017/5/11. */public class test_reflect { public test_reflect(){} public static void main(String[] args) throws Exception { String cookieid = "asad"; String b = "123"; Object[] arg= new Object[] { cookieid,b,cookieid,b}; A tempClass = (A)(Class.forName("cn.DaLong_hbase.A").newInstance()); Class[] parameter = new Class[]{String.class,String.class,String.class,String.class}; Constructor con = tempClass.getClass().getConstructor(parameter); // Object[] arg= new Object[] { cookieid,b,cookieid,b}; A a = (A)con.newInstance(arg); System.out.println(a.getA()+"------"+a.getB()); }}0 0
- hbaseFilter使用,以及简单封装
- HBaseFilter过滤器的介绍以及使用
- spark使用hbasefilter访问hbase表数据(封装)
- HbaseFilter
- okhttp的使用以及简单的封装
- okhttpUtils的简单使用以及封装
- iOS之CoreData基本使用以及简单封装
- 设备相机相册使用简单总结以及封装
- okHttp简单封装使用
- GreenDao使用简单封装
- okhttp简单封装+使用
- 在多线程环境中使用CoreData,以及一个简单的封装
- Fragment实现懒加载以及简单封装
- LOG4CXX编译,使用,简单封装
- Jtable简单的封装使用
- RecyclerView 使用的简单封装
- BroadcastReceiver的简单封装使用
- okHttp的简单封装使用
- 代码注释,佛祖,神兽
- 串口通信原理
- UTF8编码
- 自用语句-hive
- 获取服务器图片显示到listview上
- hbaseFilter使用,以及简单封装
- RHEL7配置yum源
- Mac wireshark报The capture session could not be initiated 错误
- Zabbix 自定义key类型之计算(Calculated items)
- [转]Java的21个技术点,你知道吗?
- java项目中使用JDBC连接mysql数据库查询数据
- Thrift反序列化导致OOM
- MBProgressHud的显示异常有问题(有黑色菱形)
- IDC业务的介绍1——域名注册