JNI引用溢出导致的重启问题分析

来源:互联网 发布:大数据共享交换平台 编辑:程序博客网 时间:2024/05/16 10:36
问题描述

JNI全局引用异常导致的重启问题JNI ERROR (app bug): global reference table overflow (max=51200)'

Log

pid: 1279, tid: 2518, name: Binder:1279_9  >>> system_server <<<

signal 6 (SIGABRT), code -6 (SI_TKILL), fault addr --------

Abort message: 'art/runtime/indirect_reference_table.cc:132] JNI ERROR (app bug): global reference table overflow (max=51200)'

    x0   0000000000000000  x1   00000000000009d6  x2   0000000000000006  x3   0000000000000008

    x4   0000000000000166  x5   0000800000000000  x6   0000007f0a5ab000  x7   000000000000000c

    x8   0000000000000083  x9   ffffffffffffffdf  x10  0000000000000000  x11  0000000000000001

    x12  0000000000000010  x13  0000000000000030  x14  ffffffffffffffff  x15  ffffffffffffffff

    x16  0000007f069f9ed0  x17  0000007f069a278c  x18  0000000000000000  x19  0000007eddecf4f8

    x20  0000000000000006  x21  0000007eddecf450  x22  000000000000000d  x23  0000007f05ffe000

    x24  0000007f05f4b52b  x25  0000007f05f85440  x26  0000007eddece4f1  x27  0000007f05ffcda0

    x28  0000000000002ff0  x29  0000007eddece3d0  x30  0000007f0699fbb8

    sp   0000007eddece3b0  pc   0000007f069a2794  pstate 0000000060000000

 

backtrace:

    #00 pc 000000000006d794  /system/lib64/libc.so (tgkill+8)

    #01 pc 000000000006abb4  /system/lib64/libc.so (pthread_kill+64)

    #02 pc 0000000000024098  /system/lib64/libc.so (raise+24)

    #03 pc 000000000001c93c  /system/lib64/libc.so (abort+52)

    #04 pc 000000000043581c  /system/lib64/libart.so (_ZN3art7Runtime5AbortEPKc+464)

    #05 pc 00000000000e5e7c  /system/lib64/libart.so (_ZN3art10LogMessageD2Ev+1592)

    #06 pc 000000000024dd48  /system/lib64/libart.so (_ZN3art22IndirectReferenceTable3AddEjPNS_6mirror6ObjectE+308)

    #07 pc 00000000002f2468  /system/lib64/libart.so (_ZN3art9JavaVMExt12AddGlobalRefEPNS_6ThreadEPNS_6mirror6ObjectE+60)

    #08 pc 000000000032de8c  /system/lib64/libart.so (_ZN3art3JNI12NewGlobalRefEP7_JNIEnvP8_jobject+596)

    #09 pc 0000000000101454  /system/lib64/libandroid_runtime.so (_ZN7android20javaObjectForIBinderEP7_JNIEnvRKNS_2spINS_7IBinderEEE+428)

    #10 pc 00000000000f5a3c  /system/lib64/libandroid_runtime.so

    #11 pc 000000007564f254  /data/dalvik-cache/arm64/system@framework@boot-framework.oat (offset 0x19fc000)

log初步分析

根据log,查找到是在frameworks/base/core/jni/android_util_Binder.cppjavaObjectForIBinder方法中调用NewGlobalRef时出现的问题

 

547 jobject javaObjectForIBinder(JNIEnv* env, const sp<IBinder>& val)

 548 {

 549     if (val == NULL) return NULL;

 

585         jobject refObject = env->NewGlobalRef(

 586                 env->GetObjectField(object, gBinderProxyOffsets.mSelf));

 587         val->attachObject(&gBinderProxyOffsets, refObject,

 588                 jnienv_to_javavm(env), proxy_cleanup);

 

这里NewGlobalRef操作后,没有看到明显的DeleteGlobalRef操作。会不会是没有进行DeleteGlobalRef操作导致的溢出呢?

 

解决尝试

frameworks/base/core/jni/android_util_Binder.cpp进行添加env->DeleteGlobalRef(refObject);

进行回收操作

547 jobject javaObjectForIBinder(JNIEnv* env, const sp<IBinder>& val)

 548 {    static int scount = 0;//add,记录调用次数

 549     if (val == NULL) return NULL;

 

585         jobject refObject = env->NewGlobalRef(

 586                 env->GetObjectField(object, gBinderProxyOffsets.mSelf));

 587         val->attachObject(&gBinderProxyOffsets, refObject,

 588                 jnienv_to_javavm(env), proxy_cleanup);

 

 

595

 596 scount++;

 597 if (scount % 100 == 0)

 598 {

 599         ALOGD("=====android_util_Binder.cpp, scount=%d\n", scount);

 600 }

 601

 602       env->DeleteGlobalRef(refObject);

 603         // Note that a new object reference has been created.

 604         android_atomic_inc(&gNumProxyRefs);

 605         incRefsCreated(env);

 606     }

 607

 608     return object;

 609 }

 

编译运行后,手机却不能开机了。

分析

547 jobject javaObjectForIBinder(JNIEnv* env, const sp<IBinder>& val)

 548 {

 549     if (val == NULL) return NULL;

 

585         jobject refObject = env->NewGlobalRef(

 586                 env->GetObjectField(object, gBinderProxyOffsets.mSelf));

 587         val->attachObject(&gBinderProxyOffsets, refObject,

 588                 jnienv_to_javavm(env), proxy_cleanup);

在调用中,valBpBinder585行进行了NewGlobalRef操作后,接着在587行调用了attachObject方法,查看该方法

BpBinder.cpp

void BpBinder::attachObject(

    const void* objectID, void* object, void* cleanupCookie,

    object_cleanup_func func)

{

    AutoMutex _l(mLock);

    ALOGV("Attaching object %p to binder %p (manager=%p)", object, this, &mObjects);

    mObjects.attach(objectID, object, cleanupCookie, func);

}

其中,object_cleanup_func是函数指针,

typedef void (*object_cleanup_func)(const void* id, void* obj, void* cleanupCookie);

 

传入的值为proxy_cleanup

staticvoidproxy_cleanup(constvoid* id, void* obj, void* cleanupCookie)
{
    android_atomic_dec(&gNumProxyRefs);
    JNIEnv* env = javavm_to_jnienv((JavaVM*)cleanupCookie);
    env->DeleteGlobalRef((jobject)obj);
}

 

BpBinder::attachObject方法中又调用了mObjects.attach(objectID, object, cleanupCookie, func);mObjectsBpBinder的成员变量。ObjectManager是其内部类。

           ObjectManager       mObjects;

 

void BpBinder::ObjectManager::attach(

    const void* objectID, void* object, void* cleanupCookie,

    IBinder::object_cleanup_func func)

{

    entry_t e;

    e.object = object;

    e.cleanupCookie = cleanupCookie;

    e.func = func;

 

    if (mObjects.indexOfKey(objectID) >= 0) {

        ALOGE("Trying to attach object ID %p to binder ObjectManager %p with object %p, but object ID already in use",

                objectID, this,  object);

        return;

    }

 

    mObjects.add(objectID, e);

}

 

由于mObjectsBpBinder的成员变量,在BpBinder对象析构的时候,mObjects也会析构。

BpBinder::ObjectManager::~ObjectManager()

{

    kill();

}

 

void BpBinder::ObjectManager::kill()

{

    const size_t N = mObjects.size();

    ALOGV("Killing %zu objects in manager %p", N, this);

    for (size_t i=0; i<N; i++) {

        const entry_t& e = mObjects.valueAt(i);

        if (e.func != NULL) {

            e.func(mObjects.keyAt(i), e.object, e.cleanupCookie);

        }

    }

 

    mObjects.clear();

}

 

之前传入的proxy_cleanup会被执行。

staticvoidproxy_cleanup(constvoid* id, void* obj, void* cleanupCookie)
{
    android_atomic_dec(&gNumProxyRefs);
    JNIEnv* env = javavm_to_jnienv((JavaVM*)cleanupCookie);
    env->DeleteGlobalRef((jobject)obj);
}

在这里,对

585         jobject refObject = env->NewGlobalRef(

 586                 env->GetObjectField(object, gBinderProxyOffsets.mSelf));

里面的refObject进行了DeleteGlobalRef释放操作。

 

Google的源码中考虑到了释放问题。

所以,之前添加的直接释放操作

602         env->DeleteGlobalRef(refObject);

是有问题的,进而导致了系统不能开机。

 

我们可以在art/runtime/indirect_reference_table.cc:

IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj)方法中添加log打印来对引用数量进行跟踪:

LOG(ERROR) << "JNI ERROR Test (app bug): " << kind_ << " table , topIndex= " << topIndex  << "\n";

 

打印结果为

01-01 08:18:58.363 14177-14189/? E/art: JNI ERROR Test (app bug): global reference table , topIndex= 596

01-01 08:18:58.363 14177-14189/? E/art: JNI ERROR Test (app bug): local reference table , topIndex= 0

 

这样,就可以大致定位到出现大量泄漏的时间点。

 


阅读全文
0 0
原创粉丝点击