ART Mterp Interpreter 解释 bytecode

来源:互联网 发布:淘宝红包app 编辑:程序博客网 时间:2024/05/27 00:50

Interpreter

首先分析 Interpreter 如何解释执行 dalvik byte code,Interpreter 在 ART 7.0有 3种实现:

InterpereImpl

enum InterpreterImplKind {  kSwitchImplKind,        // Switch-based interpreter implementation.  kComputedGotoImplKind,  // Computed-goto-based interpreter implementation.  kMterpImplKind          // Assembly interpreter};  template<bool do_access_check, bool transaction_active>extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,                                ShadowFrame& shadow_frame, JValue result_register,                                bool interpret_one_instruction); template<bool do_access_check, bool transaction_active>extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,                              ShadowFrame& shadow_frame, JValue result_register); // Mterp does not support transactions or access check, thus no templated versions.extern "C" bool ExecuteMterpImpl(Thread* self, const DexFile::CodeItem* code_item,                                 ShadowFrame* shadow_frame, JValue* result_register);

在 7.0 上,ART 默认使用 Mterp 类型的 Interpreter 实现,在一些特殊情况也会使用 Swtich类型的 Interpreter;

在 L & M 上,ART 只有 Switch 和 Goto 的实现;印象中 Kitkat 上使用的是 GotoT;

这就决定了,dex文件中的 dalvik byte code 会使用 ExecuteMterpImpl() 这个函数来进行解释执行;

而看其参数:code_item,shadow_frame,result_register 应该是调用它(ExecuteMterpImpl)的函数准备好的;

也就是说,不管调用java函数的是 quick code执行还是也是解释执行,其在通过 ExecuteMterpImpl 调用一个解释执行的函数时,

必须要先准备好上面 3 个参数;


下面看 ExecuteMterpImpl 函数的实现:

/* During bringup, we'll use the shadow frame model instead of xFP *//* single-purpose registers, given names for clarity */#define xPC      x20#define xFP      x21#define xSELF    x22#define xINST    x23#define wINST    w23#define xIBASE   x24#define xREFS    x25#define wPROFILE w26#define xPROFILE x26#define ip       x16#define ip2      x17  .macro GET_INST_OPCODE reg    and     \reg, xINST, #255.endm .macro GOTO_OPCODE reg    add     \reg, xIBASE, \reg, lsl #7    br      \reg.endm .macro EXPORT_PC    str  xPC, [xFP, #OFF_FP_DEX_PC_PTR].endm.macro FETCH_INST    ldrh    wINST, [xPC].endm /* * Interpreter entry point. * On entry: *  x0  Thread* self/ *  x1  code_item *  x2  ShadowFrame *  x3  JValue* result_register * */    .global ExecuteMterpImpl    .type   ExecuteMterpImpl, %function    .balign 16 ExecuteMterpImpl:    .cfi_startproc    stp     xPROFILE, x27, [sp, #-80]!                          ; callee save regs    stp     xIBASE, xREFS, [sp, #16]    stp     xSELF, xINST, [sp, #32]    stp     xPC, xFP, [sp, #48]    stp     fp, lr, [sp, #64]    add     fp, sp, #64     /* Remember the return register */    str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]       ; return register 记录在shadow frame中     /* Remember the code_item */    str     x1, [x2, #SHADOWFRAME_CODE_ITEM_OFFSET]             ; code item 记录在 shadow frame     /* set up "named" registers */    mov     xSELF, x0                                           ; Thread*     ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]       ; 这 3条指令使得 xREFS指向 shadow frame的 vregs_ 的末尾地址,要干嘛 ?    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.    add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.                      ; 开始执行的第一条指令 dex pc    add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]                  ; 获取记录在 code item中的 dalvik byte code    add     xPC, xPC, w0, lsl #1                   // Create direct pointer to 1st dex opcode   ; 把 xPC指向第一条 opcode,但是这里为何要 lsl #1 ?    EXPORT_PC                                                                                   ; 把 xPC 保存在 shadow frame的 const uint16_t* dex_pc_ptr_; 成员中,fast interpreter使用direct dexpc,不使用offset     /* Starting ibase */    ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]                                       ; Thread::tlsPtr_.mterp_current_ibase load 到寄存器 xIBASE中;// mterp_current_ibase是在 InitInterpreterTls()时设置的, 设置的值是 artMterpAsmInstructionStart = .L_op_nop ;实际就是 .L_op_nop这段代码在 内存中的地址// 后续在 GOTO_OPCODE 时,会从这个基础地址 +off 即可获取各个 opcode 对应在内存中的地址,跳转过去即可     /* Set up for backwards branches & osr profiling */    ldr     x0, [xFP, #OFF_FP_METHOD]                                                           ; 记录 hotness countdown(剩余多少次即可达到 threshold)    add     x1, xFP, #OFF_FP_SHADOWFRAME    bl      MterpSetUpHotnessCountdown    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE     /* start executing the instruction at rPC */    FETCH_INST                          // load wINST from rPC                                  ; 取指    GET_INST_OPCODE ip                  // extract opcode from wINST                            ; 驿马    GOTO_OPCODE ip                      // jump to next instruction                             ; 跳转到 opcode 对应的解释器代码    /* NOTE: no fallthrough */     .global artMterpAsmInstructionStart    .type   artMterpAsmInstructionStart, %functionartMterpAsmInstructionStart = .L_op_nop                                                         ; artMterpAsmInstructionStart指向 .L_op_nop 的起始内存    .text /* ------------------------------ */    .balign 128                                                                                 ; .balign 128,这个很关键,每个 opcode 对应代码段的 align 都是相同(128),方便根据 opcode 计算代码位置进行跳转                                                                                                ; 为什么是 128 ? 怀疑是经过对比,每个 opcode 对应的解释器代码的 size 都不超过 128 byte.L_op_nop: /* 0x00 *//* File: arm64/op_nop.S */    FETCH_ADVANCE_INST 1                // advance to next instr, load rINST    GET_INST_OPCODE ip                  // ip<- opcode from rINST    GOTO_OPCODE ip                      // execute it

接下来进行一遍实际的运行过程:

(gdb) disassembleDump of assembler code for function ExecuteMterpImpl:   0x0000007f8683cf80 <+0>:       stp x26, x27, [sp,#-80]!   0x0000007f8683cf84 <+4>:       stp x24, x25, [sp,#16]   0x0000007f8683cf88 <+8>:       stp x22, x23, [sp,#32]   0x0000007f8683cf8c <+12>:  stp x20, x21, [sp,#48]   0x0000007f8683cf90 <+16>:  stp x29, x30, [sp,#64]   0x0000007f8683cf94 <+20>:  add x29, sp, #0x40   0x0000007f8683cf98 <+24>:  str x3, [x2,#16]   0x0000007f8683cf9c <+28>:  str x1, [x2,#32]   0x0000007f8683cfa0 <+32>:  mov x22, x0   0x0000007f8683cfa4 <+36>:  ldr w0, [x2,#48]   0x0000007f8683cfa8 <+40>:  add x21, x2, #0x3c   0x0000007f8683cfac <+44>:  add x25, x21, x0, uxtx #2   0x0000007f8683cfb0 <+48>:  ldr w0, [x2,#52]                            ; 从shadowframe获取 dex pc offset到 w0   0x0000007f8683cfb4 <+52>:  add x20, x1, #0x10                          ; xPC(x20) 指向 code item中偏移 0x10的位置,该位置就是函数 bytecode 存放的位置   0x0000007f8683cfb8 <+56>:  add x20, x20, x0, uxtx #1                   ; xPC 指向 java 函数的第一个 dalvik bytecode=> 0x0000007f8683cfbc <+60>:   stur    x20, [x21,#-36]                     ; xPC保存在 shadowframe的 dex_pc_ptr_   0x0000007f8683cfc0 <+64>:  ldr x24, [x22,#1504]                        ;从 Thread* 中取出 mterp_current_ibase,保存到 x24,它的值是 0x0000007f8683d000   0x0000007f8683cfc4 <+68>:  ldur    x0, [x21,#-52]                      ; 取出 shadowframe中的 ArtMethod* 放到x0 (gdb) p /x  $x21$7 = 0x7ff85caa4c(gdb) x 0x7ff85caa4c-520x7ff85caa18:   0x712c81f0(gdb) art_get_method_name_by_method_id 0x712c81f0android.os.BaseLooper.updateMessageByState "(Landroid/os/Message;Landroid/os/BaseLooper$MessageMonitorInfo;I)V"当前正在解释执行的 java 函数是: updateMessageByState    0x0000007f8683cfc8 <+72>:  sub x1, x21, #0x3c                          ; shadowframe 指针放在 x1   0x0000007f8683cfcc <+76>:  bl  0x7f86d3560c <MterpSetUpHotnessCountdown(art::ArtMethod*, art::ShadowFrame*)>   0x0000007f8683cfd0 <+80>:  mov w26, w0                                 ; 记录 countdown剩余到 xPROFILE 寄存器   0x0000007f8683cfd4 <+84>:  ldrh    w23, [x20]                          ; 从 xPC 指向的内存中取出 1 byte 到 w23,即 opcode   0x0000007f8683cfd8 <+88>:  and x16, x23, #0xff                         ; 把 opcode 和 0xff 做 & 计算,保存到 x16 取 4 byte 时:(gdb) p /x $x20$11 = 0x733f8350(gdb) x 0x733f83500x733f8350: 0xbb1d0071 实际是取 1 byte:(gdb) x /b 0x733f83500x733f8350: 0x71(gdb) p /x $w23$15 = 0x71(gdb) p /x 0x71&0xff$16 = 0x71(gdb) p /x 0x0000007f8683d000+0x71*128$18 = 0x7f86840880即: opcode 是 0x71(71: invoke-static),对应的解释器代码入口在: 0x7f86840880; 这是一条 invoke-static 调用 static 函数的指令    0x0000007f8683cfdc <+92>:  add x16, x24, x16, lsl #7                   ; 根据 x24(mterp_current_ibase) + offset(opcode * 128)来计算这个 opcode 对应的译码器代码在内存中的位置   0x0000007f8683cfe0 <+96>:  br  x16                                     ; 跳转到 opcode 对应的译码器代码,即跳转到 0x7f86840880   0x0000007f8683cfe4 <+100>: nop   0x0000007f8683cfe8 <+104>: nop   0x0000007f8683cfec <+108>: nop   0x0000007f8683cff0 <+112>: nop   0x0000007f8683cff4 <+116>: nop   0x0000007f8683cff8 <+120>: nop   0x0000007f8683cffc <+124>: nop   [0x0000007f8683d000]<+0>:       ldrh    w23, [x20,#2]!                      ; .L_op_nop,为何 从 <+0>开始了,是因为该段的 align(128) 与上面的 align(16) 不同导致的 ?   0x0000007f8683d004 <+4>:       and x16, x23, #0xff   0x0000007f8683d008 <+8>:       add x16, x24, x16, lsl #7   0x0000007f8683d00c <+12>:  br  x16   0x0000007f8683d010 <+16>:  nop   ...   0x0000007f8683d07c <+124>: nop   0x0000007f8683d080 <+128>: lsr w1, w23, #12                            ; .L_op_move   0x0000007f8683d084 <+132>: ubfx    w0, w23, #8, #4   0x0000007f8683d088 <+136>: ldrh    w23, [x20,#2]!   0x0000007f8683d08c <+140>: ldr w2, [x21,w1,uxtw #2]   0x0000007f8683d090 <+144>: and x16, x23, #0xff   0x0000007f8683d094 <+148>: str w2, [x21,w0,uxtw #2]   0x0000007f8683d098 <+152>: str wzr, [x25,w0,uxtw #2]   0x0000007f8683d09c <+156>: add x16, x24, x16, lsl #7   0x0000007f8683d0a0 <+160>: br  x16   0x0000007f8683d0a4 <+164>: nop   ...   0x0000007f8684087c <+14460>:   nop  [0x0000007f86840880]<+14464>:   stur    x20, [x21,#-36]                                 ; .L_op_invoke_static+0; 保存 dex_pc 到 shadowframe的dex_pc_ptr_   0x0000007f86840884 <+14468>:   mov x0, x22                                             ; 准备参数 Thread(   0x0000007f86840888 <+14472>:   sub x1, x21, #0x3c                                      ; 准备参数 shadowframe   0x0000007f8684088c <+14476>:   mov x2, x20                                             ; 准备参数 dex pc   0x0000007f86840890 <+14480>:   mov x3, x23                                             ; 准备参数 dalvik byte instruction   0x0000007f86840894 <+14484>:   bl  0x7f86d2cb14 < (art::Thread*, art::ShadowFrame*, uint16_t*, uint16_t)>            ; 跳转到 MterpInvokeStatic()   0x0000007f86840898 <+14488>:   cbz w0, 0x7f8684d040 <MterpException>                 ; 如果返回值是 0,跳转到 MterpException   0x0000007f8684089c <+14492>:   ldrh    w23, [x20,#6]!                                  ; 取指下一条,并且 dex_pc+6 (因为一条完整的invoke-static指令需要 6个byte) 所以当前准备执行的这条指令是:(gdb) x /6b 0x733f83500x733f8350: 0x71    0x00    0x1d    0xbb    0x00    0x00意思是 invoke-vitual 无参    [dex_method_idx] [参数寄存器1,2,3,4]    0x0000007f868408a0 <+14496>:   bl  0x7f86d2b15c <MterpShouldSwitchInterpreters()>        ; 是否切换 Interpreter   0x0000007f868408a4 <+14500>:   cbnz    w0, 0x7f8684d1bc <MterpFallback>              ; 切换   0x0000007f868408a8 <+14504>:   and x16, x23, #0xff                                     ; 取指   0x0000007f868408ac <+14508>:   add x16, x24, x16, lsl #7                               ; 译码   0x0000007f868408b0 <+14512>:   br  x16                                                 ; 跳转到 x16 对应 opcode的解释器代码   0x0000007f868408b4 <+14516>:   nop   0x0000007f868408b8 <+14520>:   nop   ..

而实际  L_op_invoke_static 是由 MterpInvokeStatic 实现的:

extern "C" size_t MterpInvokeStatic(Thread* self,                                    ShadowFrame* shadow_frame,                                    uint16_t* dex_pc_ptr,                                    uint16_t inst_data)    SHARED_REQUIRES(Locks::mutator_lock_) {  JValue* result_register = shadow_frame->GetResultRegister();  const Instruction* inst = Instruction::At(dex_pc_ptr);            //取指  return DoInvoke<kStatic, false, false>(                         //执行      self, *shadow_frame, inst, inst_data, result_register);} template<InvokeType type, bool is_range, bool do_access_check>static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,                            uint16_t inst_data, JValue* result) {  const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();           // 从前面传递过来的 instruction 指针获取 method_idx,(取inst[0] 16 bit)  const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();              // 从前面传递过来的 instruction 指针获取 arg vregsiter 数量  Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);    // 如果是static,则 this为 nullptr  ArtMethod* sf_method = shadow_frame.GetMethod();                                          // 这个是当前正在执行的 method  ArtMethod* const called_method = FindMethodFromCode<type, do_access_check>(               // 根据 method_idx 查找 ArtMethod(invoke-static 将要调用的函数)      method_idx, &receiver, sf_method, self);  ...      if (type == kVirtual || type == kInterface) {        jit->InvokeVirtualOrInterface(                                                       // 为何只对 kVirtual 和 kInterface记录 ProfilingInfo ?            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);      }      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);                          // jit AddSamples     return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data,  // 通过 DoCall 执行                                             result);}  DoCall 又会调用 DoCallCommon(),在 DoCallCommon()中,根据 bytecode (0x733f8350:   0x71    0x00    0x1d    0xbb    0x00    0x00)进行构建 shadowframe和准备参数;然后准备执行 static函数(0xbb1d) ,其关键code:    // Do the call now.  if (LIKELY(Runtime::Current()->IsStarted())) {    ArtMethod* target = new_shadow_frame->GetMethod();    if (ClassLinker::ShouldUseInterpreterEntrypoint(                                        // 准备调用的函数如果需要解释执行,则进入 ArtInterpreterToInterpreterBridge        target,        target->GetEntryPointFromQuickCompiledCode())) {      ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);    } else {      ArtInterpreterToCompiledCodeBridge(                                                   // 准备调用的函数如果不需要解释执行,则进入 ArtInterpreterToCompiledCodeBridge          self, shadow_frame.GetMethod(), code_item, new_shadow_frame, result);    }  } else {    UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);  }    在 ArtInterpreterToInterpreterBridge()中,会调用 art::interpreter::Execute() 来解释执行:  if (LIKELY(!shadow_frame->GetMethod()->IsNative())) {    result->SetJ(Execute(self, code_item, *shadow_frame, JValue()).GetJ());  }在 Execute()中会使用 JIT记录 MethodEnter 记录调用次数,判断 jit_code_cache 中是否包含对应 ArtMethod 的 entry_point_from_quick_compiled_code_ 地址,如果包含,说明被这个java函数被 JIT编译过,则调用 ArtInterpreterToCompiledCodeBridge(),通过 ArtMethod::Invoke() 执行 compiledcode;  贴出 Execute() 关键代码:  static inline JValue Execute(    Thread* self,    const DexFile::CodeItem* code_item,    ShadowFrame& shadow_frame,    JValue result_register,    bool stay_in_interpreter = false) SHARED_REQUIRES(Locks::mutator_lock_) {        if (jit != nullptr) {        jit->MethodEntered(self, shadow_frame.GetMethod());        if (jit->CanInvokeCompiledCode(method)) {          ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);        }      }      // 三种类型的解释器,Android 7.0 默认使用 Mterp    if (kInterpreterImplKind == kMterpImplKind) {      if (transaction_active) {   // ArtTransaction 只在 CompilerDriver PreCompile InitializeClasses时 Enable;        // No Mterp variant - just use the switch interpreter.        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register,                                              false);      } else if (UNLIKELY(!Runtime::Current()->IsStarted())) {        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,                                               false);      } else {        while (true) {          // Mterp does not support all instrumentation/debugging.          if (MterpShouldSwitchInterpreters() != 0) {            return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register,                                                   false);          }          bool returned = ExecuteMterpImpl(self, code_item, &shadow_frame, &result_register);        //看到了我们的关键函数,我们开始分析的时候,就是从这个函数入手,现在又回来了,这是一个 Interpreter 调用 Interpreter循环          if (returned) {            return result_register;          } else {            // Mterp didn't like that instruction.  Single-step it with the reference interpreter.            result_register = ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame,                                                               result_register, true);            if (shadow_frame.GetDexPC() == DexFile::kDexNoIndex) {              // Single-stepped a return or an exception not handled locally.  Return to caller.              return result_register;            }          }        }      }    } else if (kInterpreterImplKind == kSwitchImplKind) {    } else {}


接下来,根据上面的流程画一张图:


对图的说明:

1.虽然我们是从 ExecuteMterpImpl 这个函数开始分析,但实际 Interpreter开始的位置是在 art::interpreter::Execute()这个函数;

它可能是被一个解释执行的代码从 ArtInterpreterToInterpreterBridge → Execute() 调用过来的,

也有可能是被一个在 native code执行的代码通过 art_quick_to_interpreter_bridge → artQuickToInterpreterBridge → interpreter::EnterInterpreterFromEntryPoint → Execute()这个调用关系调用过来的;

2.一般 invoke-XXX bytecode之后都会跟着一个 move-result 或者其变种,因为大多数情况下,我们都是需要使用函数的返回值的;

所以图中画了一条 从 OPCODE 0x71(invoke-static)到 OPCODE 0x0a(move-resutl)的跳转线;


另外在结尾,我们通过dumpoat,看一下 android.os.BaseLooper.updateMessageByState 这个函数的dalvik bytecode:

15: void android.os.BaseLooper.updateMessageByState(android.os.Message, android.os.BaseLooper$MessageMonitorInfo, int) (dex_method_idx=48127)  DEX CODE:    0x0000: 7100 1dbb 0000            | invoke-static {}, boolean android.os.AnrMonitor.canMonitorAnr() // method@47901    0x0003: 0a00                      | move-result v0    0x0004: 3800 0a00                 | if-eqz v0, +10    0x0006: ef10 2400                 | iget-boolean-quick v0, v1, thing@36    0x0008: 3800 0600                 | if-eqz v0, +6    0x000a: 2b04 2200 0000            | packed-switch v4, +34    0x000d: 7300                      | return-void-no-barrier    0x000e: 7300                      | return-void-no-barrier    0x000f: e920 1800 4300            | invoke-virtual-quick {v3, v4},  // vtable@24    0x0012: e920 1700 2300            | invoke-virtual-quick {v3, v2},  // vtable@23    0x0015: e812 1000                 | iput-object-quick v2, v1, // offset@16    0x0017: 7120 42bb 3200            | invoke-static {v2, v3}, void android.os.AnrMonitor.startMonitor(android.os.Message, android.os.BaseLooper$MessageMonitorInfo) // method@47938    0x001a: 28f3                      | goto -13    0x001b: 1200                      | const/4 v0, #+0    0x001c: e810 1000                 | iput-object-quick v0, v1, // offset@16    0x001e: e920 1800 4300            | invoke-virtual-quick {v3, v4},  // vtable@24    0x0021: 7120 30bb 3200            | invoke-static {v2, v3}, void android.os.AnrMonitor.finishMonitor(android.os.Message, android.os.BaseLooper$MessageMonitorInfo) // method@47920    0x0024: 7120 22bb 3200            | invoke-static {v2, v3}, void android.os.AnrMonitor.checkMsgTime(android.os.Message, android.os.BaseLooper$MessageMonitorInfo) // method@47906    0x0027: 7030 f1bb 2103            | invoke-direct {v1, v2, v3}, void android.os.BaseLooper.addMessageToHistoryIfNeed(android.os.Message, android.os.BaseLooper$MessageMonitorInfo) // method@48113    ...  CODE: (code_offset=0x00000000 size_offset=0x021e2330 size=0)    NO CODE!

可以看到,确实其第一条 bytecode 就是 (7100 1dbb 0000) invoke-static,

而调用的函数 canMonitorAnr() 也没有 Native code(这里不在贴 oatdump了),所以仍然解释执行,通过 ArtInterpreterToInterpreterBridge跳转执行;

紧接着的一条指令也是 move-result  v0;因为我们需要用到返回值,所以才编译出来这条 bytecode;


Mterp 实现的 Interpreter 解释器至此结束。