08分布式数据仓库 HIVE -- UDF和UDAF

来源:互联网 发布:钢结构设计软件 编辑:程序博客网 时间:2024/06/10 09:46

UDF(user defined function)

用于对每一条记录产生作用的自定义函数。


package hive;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;

public class MyUDF extends UDF {

 private boolean evaluate(Text text) {
  String t = text.toString();
  if (t.equals("boy")) {
   return true;
  } else {
   return false;
  }
 }
}


eclipse 右键 MyUDF.java --》export--》MyUDF.jar;

add jar MyUDF.jar;

create temporary function isboyas 'hive.MyUDF';

select isboy(sex) from user limit 5;

drop temporary function isbox;


--------------------------------------------------------------------------------------------------------------

UDAF(user defined aggregation function)

对一组数据做聚合操作,返回一个聚合结果。


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~·

package hive;

import hive.MyUDAF.MyEvaluate.CountAgg;

import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.LongWritable;

public class MyUDAF extends AbstractGenericUDAFResolver {

 @Override
 public GenericUDAFEvaluator getEvaluator(TypeInfo[] info)
   throws SemanticException {
  // 检查使用UDAF时,传进来的参数
  if (info.length != 2) {
   throw new UDFArgumentTypeException();
  }
  return new MyEvaluate();
 }

 public static class MyEvaluate extends GenericUDAFEvaluator {

  LongWritable result;
  PrimitiveObjectInspector primitiveObjectInspector1;
  PrimitiveObjectInspector primitiveObjectInspector2;

  public static class CountAgg implements AggregationBuffer{
   long count;
  }
  
  @Override
  public void aggregate(AggregationBuffer agg, Object[] parameters)
    throws HiveException {
   super.aggregate(agg, parameters);
  }

  @Override
  public Object evaluate(AggregationBuffer agg) throws HiveException {

   return super.evaluate(agg);
  }

  /**
   * map阶段:parameters长度和udaf的输入参数有关 reduce阶段:parameters的长度为1
   */
  @Override
  public ObjectInspector init(Mode m, ObjectInspector[] parameters)
    throws HiveException {
   super.init(m, parameters);
   result = new LongWritable(0);
   primitiveObjectInspector1 = (PrimitiveObjectInspector) parameters[0];
   if (parameters.length > 1) {
    primitiveObjectInspector2 = (PrimitiveObjectInspector) parameters[1];
   }

   return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
  }

  // -------------------------------------------------------------------------
  // MODE: 1:PARTIAL1 map(),2:PARTIAL2 combiner(),3:FINAL
  // reduce(),3:COMPLETE 只有map没有reduce的时候。执行;
  @Override
  public AggregationBuffer getNewAggregationBuffer() throws HiveException {
   CountAgg agg = new CountAgg();
   reset(agg);
   return agg;
  }

  @Override
  public void iterate(AggregationBuffer aggregationBuffer,
    Object[] objects) throws HiveException {
   // 1:PARTIAL1 map()
   // 3:COMPLETE
   assert (objects.length == 2);

   if (objects == null || objects[0] == null || objects[1] == null) {
    return;
   }

   double value1 = PrimitiveObjectInspectorUtils.getDouble(objects[0],
     primitiveObjectInspector1);
   double value2 = PrimitiveObjectInspectorUtils.getDouble(objects[1],
     primitiveObjectInspector2);

   if (value1>value2) {
    CountAgg agg = (CountAgg) aggregationBuffer;;
    agg.count++;
   }
   
  }

  @Override
  public void merge(AggregationBuffer arg0, Object arg1)
    throws HiveException {
   // 1:PARTIAL2 combiner()
   // 3:FINAL reduce();
   // 3:COMPLETE
  }

  @Override
  public void reset(AggregationBuffer arg0) throws HiveException {

  }

  @Override
  public Object terminate(AggregationBuffer arg0) throws HiveException {
   // FINAL reduce();
   return null;
  }

  @Override
  public Object terminatePartial(AggregationBuffer arg0)
    throws HiveException {
   // 1:PARTIAL1 map()
   return null;
  }

 }
}

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~













0 0
原创粉丝点击