Hive+GenericUDF示例一
来源:互联网 发布:淘宝拍照片技巧 编辑:程序博客网 时间:2024/06/15 04:57
和UDF相比,通用GDF(GenericUDF)支持复杂类型(比如List,struct等)的输入和输出。
下面来看一个小示例。
Hive中whereme表中包含若干人的行程如下:
A 2013-10-10 8:00:00 homeA 2013-10-10 10:00:00 Super MarketA 2013-10-10 12:00:00 KFCA 2013-10-10 15:00:00 schoolA 2013-10-10 20:00:00 homeA 2013-10-15 8:00:00 homeA 2013-10-15 10:00:00 parkA 2013-10-15 12:00:00 homeA 2013-10-15 15:30:00 bankA 2013-10-15 19:00:00 home
通过查询我们要得到如下结果:
A2013-10-1008:00:00home10:00:00Super MarketA2013-10-1010:00:00Super Market12:00:00KFCA2013-10-1012:00:00KFC15:00:00schoolA2013-10-1015:00:00school20:00:00homeA2013-10-1508:00:00home10:00:00parkA2013-10-1510:00:00park12:00:00homeA2013-10-1512:00:00home15:30:00bankA2013-10-1515:30:00bank19:00:00home
1.编写GenericUDF.
package com.wz.udf;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.FloatWritable;import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.serde2.lazy.LazyString;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;import java.text.DateFormat;import java.text.SimpleDateFormat;import java.util.Date; import java.util.Calendar;import java.util.ArrayList; public class helloGenericUDF extends GenericUDF { ////输入变量定义 private ObjectInspector peopleObj; private ObjectInspector timeObj; private ObjectInspector placeObj; //之前记录保存 String strPreTime = ""; String strPrePlace = ""; String strPrePeople = ""; @Override //1.确认输入类型是否正确 //2.输出类型的定义 public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { peopleObj = (ObjectInspector)arguments[0]; timeObj = (ObjectInspector)arguments[1]; placeObj = (ObjectInspector)arguments[2]; //输出结构体定义 ArrayList structFieldNames = new ArrayList(); ArrayList structFieldObjectInspectors = new ArrayList(); structFieldNames.add("people"); structFieldNames.add("day"); structFieldNames.add("from_time"); structFieldNames.add("from_place"); structFieldNames.add("to_time"); structFieldNames.add("to_place"); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector ); StructObjectInspector si2; si2 = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors); return si2; } //遍历每条记录 @Override public Object evaluate(DeferredObject[] arguments) throws HiveException{ LazyString LPeople = (LazyString)(arguments[0].get()); String strPeople = ((StringObjectInspector)peopleObj).getPrimitiveJavaObject( LPeople ); LazyString LTime = (LazyString)(arguments[1].get()); String strTime = ((StringObjectInspector)timeObj).getPrimitiveJavaObject( LTime ); LazyString LPlace = (LazyString)(arguments[2].get()); String strPlace = ((StringObjectInspector)placeObj).getPrimitiveJavaObject( LPlace ); Object[] e; e = new Object[6]; try { //如果是同一个人,同一天 if(strPrePeople.equals(strPeople) && IsSameDay(strTime) ) { e[0] = new Text(strPeople); e[1] = new Text(GetYearMonthDay(strTime)); e[2] = new Text(GetTime(strPreTime)); e[3] = new Text(strPrePlace); e[4] = new Text(GetTime(strTime)); e[5] = new Text(strPlace); } else { e[0] = new Text(strPeople);e[1] = new Text(GetYearMonthDay(strTime)); e[2] = new Text("null"); e[3] = new Text("null"); e[4] = new Text(GetTime(strTime)); e[5] = new Text(strPlace); } } catch(java.text.ParseException ex) { } strPrePeople = new String(strPeople); strPreTime= new String(strTime); strPrePlace = new String(strPlace); return e; } @Override public String getDisplayString(String[] children) { assert( children.length>0 ); StringBuilder sb = new StringBuilder(); sb.append("helloGenericUDF("); sb.append(children[0]); sb.append(")"); return sb.toString(); } //比较相邻两个时间段是否在同一天 private boolean IsSameDay(String strTime) throws java.text.ParseException{ if(strPreTime.isEmpty()){ return false; } String curDay = GetYearMonthDay(strTime); String preDay = GetYearMonthDay(strPreTime); return curDay.equals(preDay); } //获取年月日 private String GetYearMonthDay(String strTime) throws java.text.ParseException{ DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); Date curDate = df.parse(strTime); df = new SimpleDateFormat("yyyy-MM-dd"); return df.format(curDate); } //获取时间 private String GetTime(String strTime) throws java.text.ParseException{ DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); Date curDate = df.parse(strTime); df = new SimpleDateFormat("HH:mm:ss"); return df.format(curDate); }}
2.在Hive里面创建两张表,一张包含结构体的表保存执行GenericUDF查询后的结果,另外一张用于保存最终结果.
hive> create table whereresult(people string,day string,from_time string,from_place string,to_time string,to_place string);OKTime taken: 0.287 secondshive> create table tmpResult(info struct<people:string,day:string,from_time:str>ing,from_place:string,to_time:string,to_place:string>);OKTime taken: 0.074 seconds
3.执行GenericUDF查询,得到最终结果。
hive> insert overwrite table tmpResult select hellogenericudf(whereme.people,whereme.time,whereme.place) from whereme;hive> insert overwrite table whereresult select info.people,info.day,info.from_time,info.from_place,info.to_time,info.to_place from tmpResult where info.from_time<>'null';Total MapReduce jobs = 2Launching Job 1 out of 2Number of reduce tasks is set to 0 since there's no reduce operatorStarting Job = job_201312022129_0006, Tracking URL = http://localhost:50030/jobdetails.jsp?jobid=job_201312022129_0006Kill Command = /home/wangzhun/hadoop/hadoop-0.20.2/bin/../bin/hadoop job -Dmapred.job.tracker=localhost:9001 -kill job_201312022129_0006Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 02013-12-02 22:48:40,733 Stage-1 map = 0%, reduce = 0%2013-12-02 22:48:49,825 Stage-1 map = 100%, reduce = 0%2013-12-02 22:48:52,869 Stage-1 map = 100%, reduce = 100%Ended Job = job_201312022129_0006Ended Job = -383357832, job is filtered out (removed at runtime).Moving data to: hdfs://localhost:9000/tmp/hive-root/hive_2013-12-02_22-48-24_406_2701579121398466034/-ext-10000Loading data to table default.whereresultDeleted hdfs://localhost:9000/user/hive/warehouse/whereresultTable default.whereresult stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 346, raw_data_size: 0]8 Rows loaded to whereresultMapReduce Jobs Launched: Job 0: Map: 1 HDFS Read: 420 HDFS Write: 346 SUCESSTotal MapReduce CPU Time Spent: 0 msecOKTime taken: 29.098 secondshive> select * from whereresult;OKA2013-10-1008:00:00home10:00:00Super MarketA2013-10-1010:00:00Super Market12:00:00KFCA2013-10-1012:00:00KFC15:00:00schoolA2013-10-1015:00:00school20:00:00homeA2013-10-1508:00:00home10:00:00parkA2013-10-1510:00:00park12:00:00homeA2013-10-1512:00:00home15:30:00bankA2013-10-1515:30:00bank19:00:00homeTime taken: 0.105 seconds
- Hive+GenericUDF示例一
- Hive+GenericUDF示例一
- Hive+GenericUDF示例一
- Hive+GenericUDF示例二
- Hive+GenericUDF示例二
- 将Hive统计分析结果导入到MySQL数据库表中(三)——使用Hive UDF或GenericUDF
- Hive+UDTF简单示例
- Hive+UDAF简单示例
- Hive+UDTF简单示例
- Hive+UDAF简单示例
- Hive+UDTF简单示例
- Hive SQL 操作示例
- Hive+UDAF简单示例
- Hive+UDTF简单示例
- hive示例演示
- hive JDBC连接示例
- Hive与JDBC示例
- hive常用语句示例
- windows环境下部署django mod_wsgi apache
- Building an MFC project for a non-Unicode character set is deprecated
- linux下C语言errno
- DShow实现一个avi视频的播放(含有个人解释和注释)
- MVC3.0 删除操作
- Hive+GenericUDF示例一
- 个人能力--沟通
- LDAP快速入门
- Java多线程-线程的同步(同步方法)
- c#中MD5的加密解密
- linux 中管道的全双工通信(fork()父进曾和子进程执行先后顺序是不确定的)
- 适应不同分辨率屏幕的问题 android layout
- ORA-00257解决[未解决自己问题,而是直接重启电脑得解决]
- 电子邮件营销效果检验技巧分享