hive udf udaf 一例

来源:互联网 发布:淘宝企业店铺有假货吗 编辑:程序博客网 时间:2024/05/16 08:38

udf

/**
 *  分组后取前n个值
 */
package test;


import org.apache.hadoop.hive.ql.exec.UDF;
 
public final class Rank extends UDF{
private int  counter;
private String last_key;
public int evaluate(String key){
 if ( !key.equalsIgnoreCase(this.last_key) ) {
    this.counter = 0;
    this.last_key = key;
 }
 return this.counter++;
}
}


hive>add jar Rank.jar;

hive>create temporary function rank as 'com.example.hive.udf.Rank';

hive>select clsno,rank(clsno),id,score from (select clsno,id,score from byl_topn_test distribute by clsno sort by clsno,score desc)a;

得到结果:

 

取各科成绩中rank值小于2的记录即可



udaf

实现avg()


package test;


import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;


public class Avg extends UDAF{
public static class AvgStruct
{
public  double num=0;
public  int count=0;
}
public static class AvgHandle implements UDAFEvaluator 
{
private AvgStruct avgStruct=new AvgStruct();
@Override
public void init() {
// super();
}
public boolean iterate(double s)
{
if(s!=0)
{
avgStruct.num+=s;
avgStruct.count++;
}
return true;
}
public AvgStruct terminatePartial() {
return avgStruct.count==0?null:avgStruct;
}
public boolean merge(AvgStruct o) {
if(o!=null)
{
avgStruct.num+=o.num;
avgStruct.count+=o.count;
}
return true;
}
public double terminate() {
return avgStruct.count==0?null:(double)(avgStruct.num/avgStruct.count);
}
}
}



0 0