08分布式数据仓库 HIVE -- UDF和UDAF
来源:互联网 发布:钢结构设计软件 编辑:程序博客网 时间:2024/06/10 09:46
UDF(user defined function)
用于对每一条记录产生作用的自定义函数。
package hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class MyUDF extends UDF {
private boolean evaluate(Text text) {
String t = text.toString();
if (t.equals("boy")) {
return true;
} else {
return false;
}
}
}
eclipse 右键 MyUDF.java --》export--》MyUDF.jar;
add jar MyUDF.jar;
create temporary function isboyas 'hive.MyUDF';
select isboy(sex) from user limit 5;
drop temporary function isbox;
--------------------------------------------------------------------------------------------------------------
UDAF(user defined aggregation function)
对一组数据做聚合操作,返回一个聚合结果。
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~·
package hive;
import hive.MyUDAF.MyEvaluate.CountAgg;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.LongWritable;
public class MyUDAF extends AbstractGenericUDAFResolver {
@Override
public GenericUDAFEvaluator getEvaluator(TypeInfo[] info)
throws SemanticException {
// 检查使用UDAF时,传进来的参数
if (info.length != 2) {
throw new UDFArgumentTypeException();
}
return new MyEvaluate();
}
public static class MyEvaluate extends GenericUDAFEvaluator {
LongWritable result;
PrimitiveObjectInspector primitiveObjectInspector1;
PrimitiveObjectInspector primitiveObjectInspector2;
public static class CountAgg implements AggregationBuffer{
long count;
}
@Override
public void aggregate(AggregationBuffer agg, Object[] parameters)
throws HiveException {
super.aggregate(agg, parameters);
}
@Override
public Object evaluate(AggregationBuffer agg) throws HiveException {
return super.evaluate(agg);
}
/**
* map阶段:parameters长度和udaf的输入参数有关 reduce阶段:parameters的长度为1
*/
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters)
throws HiveException {
super.init(m, parameters);
result = new LongWritable(0);
primitiveObjectInspector1 = (PrimitiveObjectInspector) parameters[0];
if (parameters.length > 1) {
primitiveObjectInspector2 = (PrimitiveObjectInspector) parameters[1];
}
return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
}
// -------------------------------------------------------------------------
// MODE: 1:PARTIAL1 map(),2:PARTIAL2 combiner(),3:FINAL
// reduce(),3:COMPLETE 只有map没有reduce的时候。执行;
@Override
public AggregationBuffer getNewAggregationBuffer() throws HiveException {
CountAgg agg = new CountAgg();
reset(agg);
return agg;
}
@Override
public void iterate(AggregationBuffer aggregationBuffer,
Object[] objects) throws HiveException {
// 1:PARTIAL1 map()
// 3:COMPLETE
assert (objects.length == 2);
if (objects == null || objects[0] == null || objects[1] == null) {
return;
}
double value1 = PrimitiveObjectInspectorUtils.getDouble(objects[0],
primitiveObjectInspector1);
double value2 = PrimitiveObjectInspectorUtils.getDouble(objects[1],
primitiveObjectInspector2);
if (value1>value2) {
CountAgg agg = (CountAgg) aggregationBuffer;;
agg.count++;
}
}
@Override
public void merge(AggregationBuffer arg0, Object arg1)
throws HiveException {
// 1:PARTIAL2 combiner()
// 3:FINAL reduce();
// 3:COMPLETE
}
@Override
public void reset(AggregationBuffer arg0) throws HiveException {
}
@Override
public Object terminate(AggregationBuffer arg0) throws HiveException {
// FINAL reduce();
return null;
}
@Override
public Object terminatePartial(AggregationBuffer arg0)
throws HiveException {
// 1:PARTIAL1 map()
return null;
}
}
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- 08分布式数据仓库 HIVE -- UDF和UDAF
- hive udf和udaf
- hive udf和UDAF应用
- hive 的UDF和UDAF
- hive udf、udaf和udtf
- Hive-0.5中UDF和UDAF简述
- Hive-0.5中UDF和UDAF简述
- Hive-0.5中UDF和UDAF简述
- hive中UDF和UDAF使用说明
- hive中UDF、UDAF和UDTF使用
- hive中UDF和UDAF使用说明
- hive中UDF和UDAF使用说明
- hive中UDF、UDAF和UDTF使用
- hive中UDF、UDAF和UDTF使用
- hive中UDF和UDAF使用说明
- hive中UDF和UDAF使用说明
- hive中UDF、UDAF和UDTF使用
- hive中UDF、UDAF和UDTF使用
- ANDROID轻量级JSON序列化和反序列化
- javascript之Style对象
- Struts2之struts2标签库了解和使用
- IE7下li Bug
- 读秦小波《设计模式之禅》 -- 工厂模式
- 08分布式数据仓库 HIVE -- UDF和UDAF
- 张小龙:微信产品观(上)
- 漫步IOS--MAC键盘输入
- 06function
- kernel如何得到uboot启动信息
- instanceof php 的几个具体例子
- 汉慕斯纱窗门窗有限公司--企业简介
- IOS基础知识
- javascript map