Hive中UDAF函数的Demo
来源:互联网 发布:桌面软件图 编辑:程序博客网 时间:2024/06/06 01:29
场景:一个DEMO程序,统计分组的count、sum、dtl,并把结果以字符串拼接(count-sum-dtl)的形式输出,主要用到了结构体。
代码:
UDAF函数
package com.jd.pop.qc.udf;import org.apache.commons.lang.ArrayUtils;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.parse.SemanticException;import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;import org.apache.hadoop.hive.serde2.io.DoubleWritable;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import java.util.ArrayList;/** * Created by songhongwei on 2017-04-06. * 一个DEMO程序:统计分组的count、sum、dtl,并把结果以字符串拼接(count-sum-dtl)的形式输出 */public class CountSumDtl extends AbstractGenericUDAFResolver { @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] info) throws SemanticException { return new GenericUDAFCountSumDtlEvaluator(); } public static class GenericUDAFCountSumDtlEvaluator extends GenericUDAFEvaluator{ final static String split = "-"; final static String comma = ","; private transient PrimitiveObjectInspector inputOI; private transient StructObjectInspector soi; private transient StructField countField; private transient StructField sumField; private transient StructField contentField; private transient LongObjectInspector countFieldOI; private transient DoubleObjectInspector sumFieldOI; private transient StringObjectInspector contentFieldOI; private Object[] partialResult; public static class PartialResultAgg implements AggregationBuffer { long count; double sum; String content; } @Override public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException { super.init(mode, parameters); if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data inputOI = (PrimitiveObjectInspector)parameters[0]; }else{ soi = (StructObjectInspector)parameters[0]; countField = soi.getStructFieldRef("count"); sumField = soi.getStructFieldRef("sum"); contentField = soi.getStructFieldRef("content"); countFieldOI = (LongObjectInspector)countField.getFieldObjectInspector(); sumFieldOI = (DoubleObjectInspector)sumField.getFieldObjectInspector(); contentFieldOI = (StringObjectInspector)contentField.getFieldObjectInspector(); } //init output if(mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2){ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>(); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); ArrayList<String> fname = new ArrayList<String>(); fname.add("count"); fname.add("sum"); fname.add("content"); partialResult = new Object[]{new LongWritable(0),new DoubleWritable(0),new Text()}; return ObjectInspectorFactory.getStandardStructObjectInspector(fname,foi); } else { return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { PartialResultAgg partialResultAgg = new PartialResultAgg(); reset(partialResultAgg); return partialResultAgg; } @Override public void reset(AggregationBuffer aggregationBuffer) throws HiveException { PartialResultAgg partialResultAgg = (PartialResultAgg)aggregationBuffer; partialResultAgg.count = 0; partialResultAgg.sum = 0; partialResultAgg.content = ""; } @Override public void iterate(AggregationBuffer aggregationBuffer, Object[] objects) throws HiveException { if(ArrayUtils.isEmpty(objects)) return; PartialResultAgg partialResultAgg = (PartialResultAgg) aggregationBuffer; Double cnt = PrimitiveObjectInspectorUtils.getDouble(objects[0], inputOI); partialResultAgg.count++; partialResultAgg.sum += cnt; partialResultAgg.content += cnt.longValue(); } @Override public Object terminatePartial(AggregationBuffer aggregationBuffer) throws HiveException { PartialResultAgg partialResultAgg = (PartialResultAgg)aggregationBuffer; ((LongWritable)partialResult[0]).set(partialResultAgg.count); ((DoubleWritable)partialResult[1]).set(partialResultAgg.sum); ((Text) partialResult[2]).set(partialResultAgg.content); return partialResult; } @Override public void merge(AggregationBuffer aggregationBuffer, Object o) throws HiveException { PartialResultAgg partialResultAgg = (PartialResultAgg)aggregationBuffer; long oCount = countFieldOI.get(soi.getStructFieldData(o, countField)); double oSum = sumFieldOI.get(soi.getStructFieldData(o,sumField)); String oContent = contentFieldOI.getPrimitiveJavaObject(soi.getStructFieldData(o,contentField)); partialResultAgg.count += oCount; partialResultAgg.sum += oSum; partialResultAgg.content += comma + oContent; } @Override public Object terminate(AggregationBuffer aggregationBuffer) throws HiveException { PartialResultAgg partialResultAgg = (PartialResultAgg)aggregationBuffer; String ret = partialResultAgg.count + split+partialResultAgg.sum+split+(partialResultAgg.content.startsWith(",")?partialResultAgg.content.substring(1):partialResultAgg.content); Text result = new Text(); result.set(ret); return result; } }}
使用过程:
add jar /home/mart_pop/tianhe/qc/qc_shop_qlty_sort/jar/pop-qc-hive-1.0.0.jar;create temporary function countSumDtl as 'com.jd.pop.qc.udf.CountSumDtl';select item_first_cate_cd, min(item_first_cate_name) item_first_cate_name, countSumDtl(sku_order_cnt) d_sku_order_cntfrom app.app_qc_shop_qlty_sort_topsis_dtlwhere dt = '2017-03-31' and item_first_cate_cd in (4051,4052)group by item_first_cate_cd;
执行效果:
最后给出网上的另一个例子(以collect_set源码分析)
http://www.lai18.com/content/2694127.html?from=cancel
参考资料
GenericUDAFCaseStudy
map到reduce中间的shuffle过程
Hive中ObjectInspector的作用
Hive中ObjectInspector作用
Hive内置数据类型
Hive自定义UDF/UDAF/UDTF中,如何获得List的ObjectInspector
0 0
- Hive中UDAF函数的Demo
- hive UDAF 的DEMO
- hive的udaf相关函数
- hive UDAF函数
- Hive中UDF和UDAF的使用
- hive中添加自定义udf udaf udtf等函数的jar文件的三种方法
- hive udaf的开发
- hive udaf的编写
- Hive的UDAF
- Hive中添加自定义UDF,UDAF,UDTF等函数jar文件的三种方式
- hive中UDF和UDAF使用说明(最简单的udf函数)
- 第一个Hive UDAF函数
- hive 自定义函数UDF,UDAF
- Hive自定义函数UDAF开发
- Hive自定义函数(UDF、UDAF)
- Hive UDF UDTF UDAF 函数
- Hive自定义函数(UDF、UDAF)
- hive 中udf,udaf,udtf
- weblogic修改模式
- Spring整合Shiro做权限控制模块详细案例分析
- 图片上的文字转换成word的实现方法
- StringUtils.isEmpty和StringUtils.isBlank用法和区别
- Windows常用运行命令快速打开某个程序或服务,方便自己
- Hive中UDAF函数的Demo
- java 用验证码的形式验证邮箱
- 窗口根据鼠标移动 tip显示全部信息
- Servlet之forward、sendRedirect、 include区别与使用
- 反射 Module类详解
- POJ-1611 The Suspects
- android 8种对话框(Dialog)使用方法汇总
- Java基础知识题集(2)
- ofo创始人戴威:两年前如何起死回生?