hive 中udf,udaf,udtf

来源:互联网 发布:怎么在淘宝上买东西? 编辑:程序博客网 时间:2024/05/01 12:08
UDF步骤:
1.继承org.apache.hadoop.hive.ql.exec.UDF
2.实现evaluate函数,evaluate函数支持重载
[java] view plain copy
  1. package cn.sina.stat.hive.udf;  
  2. import java.util.Arrays;  
  3. import org.apache.hadoop.hive.ql.exec.UDF;  
  4. public final class SortFieldContent extends UDF {  
  5.         public String evaluate( final String str, String delimiter) {  
  6.                if (str == null ) {  
  7.                       return null ;  
  8.               }  
  9.                if (delimiter == null) {  
  10.                      delimiter = "," ;  
  11.               }  
  12.               String[] strs = str.split(delimiter);  
  13.               Arrays. sort(strs);  
  14.               String result = "" ;  
  15.                for (int i = 0; i < strs. length; i++) {  
  16.                       if (result.length() > 0) {  
  17.                            result.concat(delimiter);  
  18.                      }  
  19.                      result.concat(strs[i]);  
  20.               }  
  21.                return result;  
  22.        }  
  23.   
  24.         public String evaluate( final String str, String delimiter, String order) {  
  25.                if (str == null ) {  
  26.                       return null ;  
  27.               }  
  28.                if (delimiter == null) {  
  29.                      delimiter = "," ;  
  30.               }  
  31.                if (order != null && order.toUpperCase().equals( "ASC" )) {  
  32.                       return evaluate(str, delimiter);  
  33.               } else {  
  34.                      String[] strs = str.split(delimiter);  
  35.                      Arrays. sort(strs);  
  36.                      String result = "" ;  
  37.                       for (int i = strs. length - 1; i >= 0; i--) {  
  38.                             if (result.length() > 0) {  
  39.                                   result.concat(delimiter);  
  40.                            }  
  41.                            result.concat(strs[i]);  
  42.                      }  
  43.                       return result;  
  44.               }  
  45.        }  
  46. }  

UDAF步骤:
1.函数类继承org.apache.hadoop.hive.ql.exec.UDAF
   内部类实现接口org.apache.hadoop.hive.ql.exec.UDAFEvaluator
2.Evaluator需要实现 init、iterate、terminatePartial、merge、terminate这几个函数
   具体执行过程如图:

[java] view plain copy
  1. package cn.sina.stat.hive.udaf;  
  2. import java.util.Arrays;  
  3. import org.apache.hadoop.hive.ql.exec.UDAF;  
  4. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;  
  5.   
  6. public class ConcatClumnGroupByKeyWithOrder extends UDAF {  
  7.      public static class ConcatUDAFEvaluator implements UDAFEvaluator {  
  8.           public static class PartialResult {  
  9.                String result;  
  10.                String delimiter;  
  11.                String order;  
  12.           }  
  13.   
  14.           private PartialResult partial;  
  15.   
  16.           public void init() {  
  17.                partial = null;  
  18.           }  
  19.   
  20.           public boolean iterate(String value, String delimiter, String order) {  
  21.   
  22.                if (value == null) {  
  23.                     return true;  
  24.                }  
  25.                if (partial == null) {  
  26.                     partial = new PartialResult();  
  27.                     partial.result = new String("");  
  28.                     if (delimiter == null || delimiter.equals("")) {  
  29.                          partial.delimiter = new String(",");  
  30.                     } else {  
  31.                          partial.delimiter = new String(delimiter);  
  32.                     }  
  33.                     if (order != null  
  34.                               && (order.toUpperCase().equals("ASC") || order  
  35.                                         .toUpperCase().equals("DESC"))) {  
  36.                          partial.order = new String(order);  
  37.                     } else {  
  38.                          partial.order = new String("ASC");  
  39.                     }  
  40.   
  41.                }  
  42.                if (partial.result.length() > 0) {  
  43.                     partial.result = partial.result.concat(partial.delimiter);  
  44.                }  
  45.   
  46.                partial.result = partial.result.concat(value);  
  47.   
  48.                return true;  
  49.           }  
  50.   
  51.           public PartialResult terminatePartial() {  
  52.                return partial;  
  53.           }  
  54.   
  55.           public boolean merge(PartialResult other) {  
  56.                if (other == null) {  
  57.                     return true;  
  58.                }  
  59.                if (partial == null) {  
  60.                     partial = new PartialResult();  
  61.                     partial.result = new String(other.result);  
  62.                     partial.delimiter = new String(other.delimiter);  
  63.                     partial.order = new String(other.order);  
  64.                } else {  
  65.                     if (partial.result.length() > 0) {  
  66.                          partial.result = partial.result.concat(partial.delimiter);  
  67.                     }  
  68.                     partial.result = partial.result.concat(other.result);  
  69.                }  
  70.                return true;  
  71.           }  
  72.   
  73.           public String terminate() {  
  74.                String[] strs = partial.result.split(partial.delimiter);  
  75.                Arrays.sort(strs);  
  76.                String result = new String("");  
  77.                if (partial.order.equals("DESC")) {  
  78.                     for (int i = strs.length - 1; i >= 0; i--) {  
  79.                          if (result.length() > 0) {  
  80.                               result.concat(partial.delimiter);  
  81.                          }  
  82.                          result.concat(strs[i]);  
  83.                     }  
  84.                } else {  
  85.                     for (int i = 0; i < strs.length; i++) {  
  86.                          if (result.length() > 0) {  
  87.                               result.concat(partial.delimiter);  
  88.                          }  
  89.                          result.concat(strs[i]);  
  90.                     }  
  91.                }  
  92.                return new String(result);  
  93.           }  
  94.      }  
  95. }  

UDTF步骤:
1.继承org.apache.hadoop.hive.ql.udf.generic.GenericUDTF
2.实现initialize, process, close三个方法
     a.initialize初始化验证,返回字段名和字段类型
     b.初始化完成后,调用process方法,对传入的参数进行处理,通过forword()方法把结果返回
     c.最后调用close()方法进行清理工作
[java] view plain copy
  1. package cn.sina.stat.hive.udtf;  
  2. import java.util.ArrayList;  
  3. import java.util.Arrays;  
  4. import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;  
  5. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;  
  6. import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;  
  7. import org.apache.hadoop.hive.ql.metadata.HiveException;  
  8. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;  
  9. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;  
  10. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;  
  11. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;  
  12.   
  13. public class SortFieldExplodeToPair extends GenericUDTF {  
  14.   
  15.      @Override  
  16.      public void close() throws HiveException {  
  17.           // TODO Auto-generated method stub  
  18.      }  
  19.   
  20.      @Override  
  21.      public StructObjectInspector initialize(ObjectInspector[] args)  
  22.                throws UDFArgumentException {  
  23.           if (args.length != 3) {  
  24.                throw new UDFArgumentLengthException(  
  25.                          "SortFieldExplodeToPair takes only three argument");  
  26.           }  
  27.           if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {  
  28.                throw new UDFArgumentException(  
  29.                          "SortFieldExplodeToPair takes string as first parameter");  
  30.           }  
  31.           if (args[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {  
  32.                throw new UDFArgumentException(  
  33.                          "SortFieldExplodeToPair takes string as second parameter");  
  34.           }  
  35.           if (args[2].getCategory() != ObjectInspector.Category.PRIMITIVE) {  
  36.                throw new UDFArgumentException(  
  37.                          "SortFieldExplodeToPair takes string as third parameter");  
  38.           }  
  39.           if (args[2] == null  
  40.                     || !(args[2].toString().toUpperCase().equals("ASC") || args[2]  
  41.                               .toString().toUpperCase().equals("DESC"))) {  
  42.                throw new UDFArgumentException(  
  43.                          "SortFieldExplodeToPair third parameter must be \"ASC\" or \"DESC\"");  
  44.           }  
  45.   
  46.           ArrayList<String> fieldNames = new ArrayList<String>();  
  47.           ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();  
  48.           fieldNames.add("col1");  
  49.           fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);  
  50.   
  51.           return ObjectInspectorFactory.getStandardStructObjectInspector(  
  52.                     fieldNames, fieldOIs);  
  53.      }  
  54.   
  55.      private final String[] forwardStr = new String[1];  
  56.   
  57.      @Override  
  58.      public void process(Object[] args) throws HiveException {  
  59.           String input = args[0].toString();  
  60.           String delimiter = args[1].toString();  
  61.           String order = args[2].toString();  
  62.           String[] strList = input.split(delimiter);  
  63.           Arrays.sort(strList);  
  64.           if (strList.length > 1) {  
  65.                if (order.toUpperCase().equals("DESC")) {  
  66.                     for (int i = strList.length - 1; i > 0; i--) {  
  67.                          forwardStr[0] = strList[i].concat(delimiter).concat(  
  68.                                    strList[i - 1]);  
  69.                          forward(forwardStr);  
  70.                     }  
  71.                } else {  
  72.                     for (int i = 0; i < strList.length - 1; i++) {  
  73.                          forwardStr[0] = strList[i].concat(delimiter).concat(  
  74.                                    strList[i + 1]);  
  75.                          forward(forwardStr);  
  76.                     }  
  77.                }  
  78.           } else {  
  79.                forward(strList);  
  80.           }  
  81.      }  
  82. }  

0 0
原创粉丝点击