小抄:UDTF&UDAF示例

来源:互联网 发布:二维码点餐软件 编辑:程序博客网 时间:2024/04/20 02:52

UDAF

UDAF对应一行输入forward多行输出
eg:

//TODO define input and output types,e.g.,"string,string->string,bigint".@Resolve({"string,bigint->string,double"})public class byeFellows extends UDTF{@Overridepublic void process(Object[] args) throws UDFException{    String a=(String)args[0];    Long b=(Long)args[1];    int cnt=1;    char[] c=a.toCharArray();    //拆分空格分隔的多值列    for(int i=0;i<a.length(),++i)    {        if(c[i]==' ')        {            cnt=cnt+1;        }    }    if(cnt==0)cnt=1;    //如果是多个空格隔开的多值列    for(String t:a.split("\\s+"))    {        forward(t,(double)b/cnt);    }}}

UDTF

UDTF做聚合操作输出一条统计信息
eg:计算变异系数

//实现writable的接口,实现读写方法private static class AvgBuffer implements Writable{    private double sum=0;    private long count=0;    private double sum2=0;    @Override    {        out.writableDouble(sum);        out.writableDouble(sum2);        out.writableLong(count);    }    @Override    public void readFields(DataInput in)throws IOException    {        sum=in.readDouble();        sum2=in.readDouble();        count=in.readLong();    }    @Override    public Writable newBuffer()    {        return new AvgBuffer();    }}@Override//游标遍历每个小分片上的统计量信息public void iterate(Writable buffer,Writable[] args)throws UDFException{    DoubleWritable arg=(DoubleWritable)args[0];    AvgBuffer buf=(AvgBuffer)buffer;    if(arg!=null)    {        buf.count+=1;        buf.sum+=arg.get();        buf.sum2+=Math.pow(arg.get(),2);    }}@Override//所有分片结果合并public void merge(Writable buffer,Writable partial)throws UDFException{    AvgBuffer buf=(AvgBuffer)buffer;    AvgBuffer p=(AvgBuffer)partial;    buf.sum+=p.sum;    buf.sum2+=p.sum2;    buf.count+=p.count;}private DoubleWritable ret=new DoubleWritable();@Override//merge后进行业务逻辑处理public Writable terminate(Writable buffer)throws UDFException{    AvgBuffer buf=(AvgBuffer)buffer;    if(buf.count==0)    {        ret.set(0);    }else if (buf.count==1)    {        ret.set(buf.sum);    }else    {        ret.set(Math.sqrt((buf.sum2-buf.sum*(buf.sum/buf.count))/(buf.count-1))/(buf.sum/buf.count));    }return ret;}
0 0
原创粉丝点击