hive 自定义函数

来源:互联网 发布:上海租房网站 知乎 编辑:程序博客网 时间:2024/06/07 03:22

数据格式

zhangsan |  a,b,c,d,e,f,g

lisi |  h,i,j,k,l,m,n


结构:

zhangsan a

zhangsan b

zhagnsan c

zhangsan d

zhangsan e

zhangsan f

zhangsan g

list h

list i

list j

lisi k

lisi l

list m

lisi n

----------------------------------

package com.snda.hive.aaudf;import java.util.ArrayList;import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;public class ExplodeMap extends GenericUDTF{    @Override    public void close() throws HiveException {        // TODO Auto-generated method stub        }    @Override    public StructObjectInspector initialize(ObjectInspector[] args)            throws UDFArgumentException {        if (args.length != 2) {            throw new UDFArgumentLengthException("ExplodeMap takes only two argument");        }        if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {            throw new UDFArgumentException("ExplodeMap takes string as a parameter");        }        ArrayList<String> fieldNames = new ArrayList<String>();        ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();        fieldNames.add("col1");        fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs);    }    @Override    public void process(Object[] args) throws HiveException {        String input = args[0].toString();        String prex = args[1].toString();        String[] strList = input.split(prex);        for(String line:strList){            try{                String[] result = line.split("XXXX");                forward(result);            }catch(Exception e){                continue;            }        }    }}

create table testtable(
name string,
address string
)
row format delimited fields terminated by '|' lines terminated by '\n' stored as textfile;
load data local inpath '/home/hadoop/data/test.txt' overwrite into table testtable; 


add jar hdfs://192.168.1.30:9000/ExplodeMap.jar;
create temporary function explode as 'com.snda.hive.aaudf.ExplodeMap';
select  name,adid from testtable LATERAL VIEW explode(address,',') adTable as adid;

原创粉丝点击