ve0.13 rows loaded为空问题源码分析及fix

来源:互联网 发布:windows ce刷安卓教程 编辑:程序博客网 时间:2024/06/03 23:21

升级hive0.13之后发现job运行完成后Rows loaded的信息没有了。

rows loaded的信息在hive0.11中由HiveHistory类的printRowCount输出。HiveHistory类的主要用途是记录job运行的信息,包括task的counter等。默认的目录在/tmp/$user中。

hive0.11在SessionState 的start方法中会初始化HiveHistory的对象

1
2
3
if(startSs. hiveHist == null) {
      startSs. hiveHist =new HiveHistory(startSs);
    }

而在hive0.13中HiveHistory是一个抽象类,其具体的实现在HiveHistoryImpl类中,其中初始化HiveHistoryImpl对象时增加了一层判断,判断hive.session.history.enabled的设置(默认为false),导致不会实例化HiveHistoryImpl类

1
2
3
4
5
6
7
8
  if(startSs.hiveHist ==null){
      if(startSs.getConf().getBoolVar(HiveConf.ConfVars.HIVE_SESSION_HISTORY_ENABLED)) {
        startSs.hiveHist =new HiveHistoryImpl (startSs);
      }else{
        //Hive history is disabled, create a no-op proxy
        startSs.hiveHist = HiveHistoryProxyHandler .getNoOpHiveHistoryProxy();
      }
    }

在fix这个配置之后,仍然没有发现rows loaded的信息,通过分析源码

printRowCount方法的实现如下:

1
2
3
4
5
6
7
8
9
  publicvoid printRowCount(String queryId) {
    QueryInfo ji = queryInfoMap.get(queryId);
    if(ji == null) {  // 如果ji为空,则直接返回
      return;
    }
    for(String tab : ji. rowCountMap.keySet()) {
      console.printInfo(ji. rowCountMap.get(tab) +" Rows loaded to " + tab); // 从hashmap中获取数据
    }
  }

在hive0.13中,这里获取的ji对象是空值。

近一步发现,是由于counter中没有TABLE_ID_(\\d+)_ROWCOUNT,导致不能匹配ROW_COUNT_PATTERN的正则。就不能正常获取的row count的值。

其中获取tasker count的rows loaded信息的getRowCountTableName方法内容如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
private static final String ROW_COUNT_PATTERN ="TABLE_ID_(\\d+)_ROWCOUNT";
  privatestatic final Pattern rowCountPattern = Pattern.compile(ROW_COUNT_PATTERN);
......
  String getRowCountTableName(String name) {
    if(idToTableMap == null) {
      returnnull;
    }
    Matcher m = rowCountPattern.matcher(name);
    if(m.find()) {  // //没有和TABLE_ID_xxxx match的counter导致,即counter没有打印出TABLE_ID_(\\d+)_ROWCOUNT导致。。
      String tuple = m.group(1);
      returnidToTableMap.get(tuple);
    }
    returnnull;
  }

而TABLE_ID_(\\d+)_ROWCOUNT是由FileSinkOperator类负责写入的。hive0.11中相关的代码如下:

1
2
3
4
5
6
7
8
9
  protectedvoid initializeOp(Configuration hconf)throws HiveException {
..........
      intid = conf.getDestTableId();
      if((id != 0) && (id <= TableIdEnum. values().length)) {
        String enumName ="TABLE_ID_" + String.valueOf(id) +"_ROWCOUNT";
        tabIdEnum = TableIdEnum.valueOf(enumName);
        row_count =new LongWritable();
        statsMap.put( tabIdEnum, row_count );
      }

而在hive0.13中这部分代码都被去掉了,找到了原因,fix也比较简单,把这个counter加回去就可了。

patch如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 1dde78e..96860f7100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -68,13+68,16@@
import org.apache.hadoop.util.ReflectionUtils;
import com.google.common.collect.Lists;
+importorg.apache.commons.logging.Log;
+importorg.apache.commons.logging.LogFactory;
+
/**
  * File Sink operator implementation.
  **/
public class FileSinkOperator extendsTerminalOperator<FileSinkDesc> implements
     Serializable {
-
publicstatic Log LOG = LogFactory.getLog("FileSinkOperator.class");
   protectedtransient HashMap<String, FSPaths> valToPaths;
   protectedtransient intnumDynParts;
   protectedtransient List<String> dpColNames;
@@ -214,6+217,7@@ public Stat getStat() {
   protectedtransient FileSystem fs;
   protectedtransient Serializer serializer;
   protectedtransient LongWritable row_count;
protectedtransient TableIdEnum tabIdEnum =null;
   privatetransient booleanisNativeTable = true;
   /**
@@ -241,6+245,23@@ public Stat getStat() {
   protectedtransient JobConf jc;
   Class<?extends Writable> outputClass;
   String taskId;
publicstatic enum TableIdEnum {
+       TABLE_ID_1_ROWCOUNT,
+       TABLE_ID_2_ROWCOUNT,
+       TABLE_ID_3_ROWCOUNT,
+       TABLE_ID_4_ROWCOUNT,
+       TABLE_ID_5_ROWCOUNT,
+       TABLE_ID_6_ROWCOUNT,
+       TABLE_ID_7_ROWCOUNT,
+       TABLE_ID_8_ROWCOUNT,
+       TABLE_ID_9_ROWCOUNT,
+       TABLE_ID_10_ROWCOUNT,
+       TABLE_ID_11_ROWCOUNT,
+       TABLE_ID_12_ROWCOUNT,
+       TABLE_ID_13_ROWCOUNT,
+       TABLE_ID_14_ROWCOUNT,
+       TABLE_ID_15_ROWCOUNT;
+  }
   protectedboolean filesCreated =false;
@@ -317,7+338,15@@ protected void initializeOp(Configuration hconf) throwsHiveException {
         prtner = (HivePartitioner<HiveKey, Object>) ReflectionUtils.newInstance(
             jc.getPartitionerClass(), null);
       }
-      row_count =new LongWritable();
+      //row_count = new LongWritable();
+         int id = conf.getDestTableId();
+         if ((id != 0) && (id <= TableIdEnum.values().length)) {
+               String enumName ="TABLE_ID_" + String.valueOf(id) +"_ROWCOUNT"
+               tabIdEnum = TableIdEnum.valueOf(enumName);
+               row_count =new LongWritable();
+               statsMap.put(tabIdEnum, row_count);
+         }
+
       if(dpCtx != null) {
         dpSetup();
0 0
原创粉丝点击