hiveql--建表

来源:互联网 发布:navmesh 寻路算法 编辑:程序博客网 时间:2024/06/05 17:17

创建hive表

1、建外表(分区pdate,\t作为字段分隔符,hdfs路径:path):

CREATE EXTERNAL TABLE tablename(
字段1 string,
字段2 string
)    
partitioned by (pdate string)
row format delimited fields terminated by '\t'
LOCATION 'path';

2、创建数据库database_test,创建表table_test(分区pdate、hour,压缩建表):

hive -e"create database if not exists database_test;"

hive -e "DROP TABLE IF EXISTS database_test.table_test";
sql="CREATE TABLE database_test.table_test
(
字段1 string
,字段2 string
)
PARTITIONED BY (
  pdate string,
  hour string)
STORED AS ORC tblproperties ('orc.compress'='ZLIB')";

hive -e "$sql";

3、序列化建表(将日志解析,为各个字段取相应的日志内容):

   首先需要写一个解析类,代码如下:


package net.csdn.hive.table_leiyf;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import net.sf.json.JSONObject;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

public class TestDeserializer implements Deserializer {
private static List<String> FieldNames = new ArrayList<String>();
private static List<ObjectInspector> FieldNamesObjectInspectors = new ArrayList<ObjectInspector>();
static {
FieldNames.add("type");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("userid");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("wsid");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("dt");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("mem_disk");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("cpu");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("netrx");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
FieldNames.add("nettx");
FieldNamesObjectInspectors.add(ObjectInspectorFactory
.getReflectionObjectInspector(String.class,
ObjectInspectorOptions.JAVA));
}

public Object deserialize(Writable blob) {
if (blob instanceof Text) {
String line = ((Text) blob).toString();
if (line == null)
return null;
Map<String, String> r = new HashMap<String, String>();
List<Object> result = new ArrayList<Object>();
JSONObject obj= JSONObject.fromObject(line);
if(obj != null && obj.containsKey("data")){
JSONObject objData = (JSONObject) obj.get("data");
if(objData.containsKey("type") && objData.get("type").toString().equalsIgnoreCase("mcn")){
result.add("mcn"); //type
if(objData.containsKey("userId"))
result.add(objData.getString("userId"));
if(objData.containsKey("wsId"))
result.add(objData.getString("wsId"));
if(objData.containsKey("date")){
Long timestamp = Long.parseLong(objData.getString("date"));
String dt = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date(timestamp));
result.add(dt);
}
if(objData.containsKey("mem"))
result.add(objData.getString("mem"));
if(objData.containsKey("cpu"))
result.add(objData.getString("cpu"));
if(objData.containsKey("netrx"))
result.add(objData.getString("netrx"));
if(objData.containsKey("nettx"))
result.add(objData.getString("nettx"));
}
else if(objData.containsKey("type") && objData.get("type").toString().equalsIgnoreCase("disk")){
if(objData.containsKey("type"))
result.add("disk");
if(objData.containsKey("userId"))
result.add(objData.getString("userId"));
if(objData.containsKey("wsId"))
result.add(objData.getString("wsId"));
if(objData.containsKey("date")){
Long timestamp = Long.parseLong(objData.getString("date"));
String dt = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new java.util.Date(timestamp));
result.add(dt);
}
if(objData.containsKey("disk"))
result.add(objData.getString("disk"));
result.add("");
result.add("");
result.add("");
}
}
return result;
}
return null;
}

public ObjectInspector getObjectInspector() throws SerDeException {
return ObjectInspectorFactory.getStandardStructObjectInspector(
FieldNames, FieldNamesObjectInspectors);
}

public void initialize(Configuration arg0, Properties arg1)
throws SerDeException {
}

public SerDeStats getSerDeStats() {
// TODO Auto-generated method stub
return null;
}

}


将上面打包,然后建表的时候rowformat用这个解析类TestDeserializer,语句如下:

add jar /path/test.jar;

drop table if exsits file_test;
create table file_test
partitioned by (pdate string)
row format serde 'net.csdn.hive.table.TestDeserializer'
LOCATION '/home/debug/test22'