android Binder详解（4）

来源：互联网发布：淘宝火锅底料编辑：程序博客网时间：2024/06/05 12:40

3.4 getService()分析

clinet中获取service接口的调用如下：

    sp<ISampleService> sampleSrv;    if(getService(String16("SampleService"), &sampleSrv) != NO_ERROR){        ALOGE("get SampleService fail");        return 0;    }

getService()实现如下：

template<typename INTERFACE>status_t getService(const String16& name, sp<INTERFACE>* outService){    const sp<IServiceManager> sm = defaultServiceManager();    if (sm != NULL) {        *outService = interface_cast<INTERFACE>(sm->getService(name));        if ((*outService) != NULL) return NO_ERROR;    }    return NAME_NOT_FOUND;}

这其中获取ServiceManager接口的动作，前面已经分析过了。之后有两个动作：
1，sm->getService(name)获取到一个sp<IBinder>对象。
2，从获取的BpBinder指针中构造出BpSampleService对象。

第二步是很清楚的，就是调用了BpSampleService::interface_cast<ISampleService>(sp<IBinder>)。我们主要需要研究一下第一步的细节。

3.4.1 BpServiceManager::getService()

BpServiceManager::getService()是调用了checkService()函数

virtual sp<IBinder> getService(const String16& name) const    {        unsigned n;        for (n = 0; n < 5; n++){            sp<IBinder> svc = checkService(name);            if (svc != NULL) return svc;            ALOGI("Waiting for service %s...\n", String8(name).string());            sleep(1);        }        return NULL;    }

virtual sp<IBinder> checkService( const String16& name) const    {        Parcel data, reply;        data.writeInterfaceToken(IServiceManager::getInterfaceDescriptor());        data.writeString16(name);        remote()->transact(CHECK_SERVICE_TRANSACTION, data, &reply);        return reply.readStrongBinder();    }

checkService()中之写入了InterfaceToken和servicename，然后就开始了transact()了。
这边的transact因为没有写入object，流程和PING_TRANSACTION的基本一样了。我们直接到ServiceManager端看看具体的处理。

3.4.2 ServiceManager处理CHECK_SERVICE_TRANSACTION

command和ADD_SERVICE一样在svcmgr_handler()中处理

    case SVC_MGR_GET_SERVICE:    case SVC_MGR_CHECK_SERVICE:       //获取service name。        s = bio_get_string16(msg, &len);       //查找对应的service。        ptr = do_find_service(bs, s, len, txn->sender_euid);        if (!ptr)            break;        bio_put_ref(reply, ptr);        return 0;

主要看下do_find_service()

void *do_find_service(struct binder_state *bs, uint16_t *s, unsigned len, unsigned uid){    //根据service name找到对应的svcinfo结点。    struct svcinfo *si;    si = find_svc(s, len);    //    ALOGI("check_service('%s') ptr = %p\n", str8(s), si ? si->ptr : 0);    if (si && si->ptr) {        if (!si->allow_isolated) {            // If this service doesn't allow access from isolated processes,            // then check the uid to see if it is isolated.            unsigned appid = uid % AID_USER;            if (appid >= AID_ISOLATED_START && appid <= AID_ISOLATED_END) {                return 0;            }        }    //返回结点中的ptr，这个ptr是binder中对应的binder_ref.desc。        return si->ptr;    } else {        return 0;    }}

我们看到了最终返回的是SampleService在ServiceManager这边对应的binder_ref的desc信息，之后这个信息被写入到write buffer中：

void bio_put_ref(struct binder_io *bio, void *ptr){     //构造了一个binder_object，也就是一个flat_binder_object。    struct binder_object *obj;    if (ptr)        obj = bio_alloc_obj(bio);    else        obj = bio_alloc(bio, sizeof(*obj));    if (!obj)        return;    obj->flags = 0x7f | FLAT_BINDER_FLAG_ACCEPTS_FDS;    obj->type = BINDER_TYPE_HANDLE;    obj->pointer = ptr;    obj->cookie = 0;}

之后进入binder_send_reply()，在这里，我们write了BC_FREE_BUFFER，还有一组BC_REPLY信息，BC_REPLY信息中，是在bio_put_ref()中构造的binder_object。
带着这些信息，我们进入到binder中，首先在binder_thread_write()中处理BC_FREE_BUFFER，这边的处理和之前的PING_TRANSACTION基本一致，没有object，释放掉binder_buffer就可以了。
然后处理BC_REPLY，主要在binder_transaction()函数中，这里和addService处理差异在于reply信息是一个object，而不是一个数据，我们看下组织binder_transaction的部分：

        //remote binder object, means BpBinder。        case BINDER_TYPE_HANDLE:        case BINDER_TYPE_WEAK_HANDLE: {            struct binder_ref *ref = binder_get_ref(proc, fp->handle);            if (ref == NULL) {                binder_user_error("%d:%d got transaction with invalid handle, %ld\n",                        proc->pid,                        thread->pid, fp->handle);                return_error = BR_FAILED_REPLY;                goto err_binder_get_ref_failed;            }            if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {                return_error = BR_FAILED_REPLY;                goto err_binder_get_ref_failed;            }            //如果传给service所在的进程，转换为BINDER_TYPE_BINDER类型的object。            if (ref->node->proc == target_proc) {                if (fp->type == BINDER_TYPE_HANDLE)                    fp->type = BINDER_TYPE_BINDER;                else                    fp->type = BINDER_TYPE_WEAK_BINDER;                fp->binder = ref->node->ptr;                fp->cookie = ref->node->cookie;                binder_inc_node(ref->node, fp->type == BINDER_TYPE_BINDER, 0, NULL);                trace_binder_transaction_ref_to_node(t, ref);                binder_debug(BINDER_DEBUG_TRANSACTION,                         "        ref %d desc %d -> node %d u%p\n",                         ref->debug_id, ref->desc, ref->node->debug_id,                         ref->node->ptr);            } else {//传入到其他client进程，为目标进程建立新的binder_ref，并传回这个新的binder_ref的信息。                struct binder_ref *new_ref;                new_ref = binder_get_ref_for_node(target_proc, ref->node);//第一次会建立一个新的binder_ref。                if (new_ref == NULL) {                    return_error = BR_FAILED_REPLY;                    goto err_binder_get_ref_for_node_failed;                }                fp->handle = new_ref->desc;//用目标进程desc替换。                binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL);                trace_binder_transaction_ref_to_ref(t, ref,                                    new_ref);                binder_debug(BINDER_DEBUG_TRANSACTION,                         "        ref %d desc %d -> ref %d desc %d (node %d)\n",                         ref->debug_id, ref->desc, new_ref->debug_id,                         new_ref->desc, ref->node->debug_id);            }        } break;

在这里，binder会为client进程建立一个新的binder_ref出来，他会在当前进程中得到一个唯一的desc也就是handle值，注意desc值0是保留给了ServiceManager，最终我们把这个handle回填到data中。

3.4.3 client端处理返回信息
client在IPCThreadState::waitForResponse()处理BR_REPLY，数据传递给Parcel reply中，最终调用得到了sp<IBinder>对象。

reply.readStrongBinder();

我们看看readStrongBinder()里面做了什么：

sp<IBinder> Parcel::readStrongBinder() const{    sp<IBinder> val;    unflatten_binder(ProcessState::self(), *this, &val);    return val;}

status_t unflatten_binder(const sp<ProcessState>& proc,    const Parcel& in, sp<IBinder>* out){    const flat_binder_object* flat = in.readObject(false);        if (flat) {        switch (flat->type) {            case BINDER_TYPE_BINDER:                *out = static_cast<IBinder*>(flat->cookie);                return finish_unflatten_binder(NULL, *flat, in);            case BINDER_TYPE_HANDLE:                //查找对应的BpBinder对象。                *out = proc->getStrongProxyForHandle(flat->handle);                return finish_unflatten_binder(                    static_cast<BpBinder*>(out->get()), *flat, in);        }            }    return BAD_TYPE;}

最终还是调用了getStrongproxyForHandle()来获取了sp<IBinder>，参数的handle就是在binder中的binder_ref.desc。
getStrongproxyForHandle()中创建BpBinder的动作，我们就不在看了，我们看一下如果不是首次调用的情况下的处理：

} else {            // This little bit of nastyness is to allow us to add a primary            // reference to the remote proxy when this team doesn't have one            // but another team is sending the handle to us.           //force_set()函数和直接赋值的差异在于它会强制调用RefBase.onFirstRef()，对这里来说           //也就是会出发BpBinder.onFirstRef()。            result.force_set(b);            e->refs->decWeak(this);        }

这段代码中主要注意一下result.fore_set(b)这行代码，这里又强制触发了一次RefBase.onFirstRef()，调用的目的是对binder中的binder_ref增加了一次引用。
但是这里有个问题：
force_set对于binder_ref又增加了一次reference，但是因为他们用的都是同意BpBinder，最终只会调用一次onLastStrongRef()去decrease strong reference，这样会导致ref的增减不同步，可能是会有问题的。（加了debug信息确认，的确是会出现这样的问题的。）

至此，getService()部分分析完毕，我们看到了service的传递主要是依赖binder对于binder_ref的管理，binder_ref.desc决定了在user层的handle，user层又使用handle去建立了一个BpBinder，所有的service访问结构都是使用这个BpBinder构造出BpXXX对象来操作service接口。

3.5 sayHello()调用分析
这部分我们分析下client端调用SampleService的sayHello()接口的流程，看看client端是如何把信息投递到service进程中去的。

3.5.1 sayHello() client调用

virtual int sayHello(const String8& clientName){        Parcel data,reply;        data.writeInterfaceToken(ISampleService::getInterfaceDescriptor());        data.writeString8(clientName);        remote()->transact(BnSampleService::SAY_HELLO, data, &reply);        int ret = reply.readInt32();        ALOGD("sayHello return %d",ret);        return ret;    }

这边直接进入到binder中的binder_transaction()函数中去看处理，首先是查找target node的动作:

     if (tr->target.handle) {//handle不为0的情况，这个是一般service的处理。            struct binder_ref *ref;            ref = binder_get_ref(proc, tr->target.handle);            if (ref == NULL) {                binder_user_error("%d:%d got transaction to invalid handle\n",                    proc->pid, thread->pid);                return_error = BR_FAILED_REPLY;                goto err_invalid_target_handle;            }            target_node = ref->node;        } else {//handle为0，即ServiceManager的case，直接获取binder_context_mgr_node。            target_node = binder_context_mgr_node;            if (target_node == NULL) {                return_error = BR_DEAD_REPLY;                goto err_no_context_mgr_node;            }        }

这部分和ServiceManager不一样，ServieManager的handle always为0，判断handle值为0就可以使用binder_context_mgr_node了。
对于一般service，handle是当前进程中的binder_ref的desc，根据handle我们可以找到对应的binder_ref，而binder_ref.node保存了它所对应的binder_node。
得到了target node之后，可以找到对应的target proc，这样我们就可以把work加入到到对应的进程的todo list中去了。

3.5.2 sayHello() Service端调用

Service端是在IPCThreadState::joinThreadPool()中响应处理的，通过talkWithDriver()和binder打交道，在binder_thread_read()中获取到client传来的信息，流程没有什么特别的，我们注意一下传回来的数据的组织：

        if (t->buffer->target_node) {//transaction cmd时候。            //对于binder_context_mgr_node，ptr和cookie都为0。            //对于一般service来说，binder_node中的ptr是service的weakrefs指针,cookie是service的对象指针。（见Parcel中的flatten_binder()）            struct binder_node *target_node = t->buffer->target_node;            tr.target.ptr = target_node->ptr;            tr.cookie =  target_node->cookie;            t->saved_priority = task_nice(current);            if (t->priority < target_node->min_priority &&                !(t->flags & TF_ONE_WAY))                binder_set_nice(t->priority);            else if (!(t->flags & TF_ONE_WAY) ||                 t->saved_priority > target_node->min_priority)                binder_set_nice(target_node->min_priority);            cmd = BR_TRANSACTION;        } else {//reply cmd时候，reply时候target_node为null。            tr.target.ptr = NULL;            tr.cookie = NULL;            cmd = BR_REPLY;        }

注意这边tr.target.ptr和tr.cookie这两个指针，cookie这个就是SampleService的对象。
回到在executeCommand()中处理command：

      case BR_TRANSACTION:        {            binder_transaction_data tr;            result = mIn.read(&tr, sizeof(tr));            ALOG_ASSERT(result == NO_ERROR,                "Not enough command data for brTRANSACTION");            if (result != NO_ERROR) break;                        Parcel buffer;           //将data保存到Parcel中。            buffer.ipcSetDataReference(                reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer),                tr.data_size,                reinterpret_cast<const size_t*>(tr.data.ptr.offsets),                tr.offsets_size/sizeof(size_t), freeBuffer, this);                        const pid_t origPid = mCallingPid;            const uid_t origUid = mCallingUid;                        mCallingPid = tr.sender_pid;            mCallingUid = tr.sender_euid;                       //调整Priority。            int curPrio = getpriority(PRIO_PROCESS, mMyThreadId);            if (gDisableBackgroundScheduling) {                if (curPrio > ANDROID_PRIORITY_NORMAL) {                    // We have inherited a reduced priority from the caller, but do not                    // want to run in that state in this process.  The driver set our                    // priority already (though not our scheduling class), so bounce                    // it back to the default before invoking the transaction.                    setpriority(PRIO_PROCESS, mMyThreadId, ANDROID_PRIORITY_NORMAL);                }            } else {                if (curPrio >= ANDROID_PRIORITY_BACKGROUND) {                    // We want to use the inherited priority from the caller.                    // Ensure this thread is in the background scheduling class,                    // since the driver won't modify scheduling classes for us.                    // The scheduling group is reset to default by the caller                    // once this method returns after the transaction is complete.                    set_sched_policy(mMyThreadId, SP_BACKGROUND);                }            }            //ALOGI(">>>> TRANSACT from pid %d uid %d\n", mCallingPid, mCallingUid);                        Parcel reply;            IF_LOG_TRANSACTIONS() {                TextOutput::Bundle _b(alog);                alog << "BR_TRANSACTION thr " << (void*)pthread_self()                    << " / obj " << tr.target.ptr << " / code "                    << TypeCode(tr.code) << ": " << indent << buffer                    << dedent << endl                    << "Data addr = "                    << reinterpret_cast<const uint8_t*>(tr.data.ptr.buffer)                    << ", offsets addr="                    << reinterpret_cast<const size_t*>(tr.data.ptr.offsets) << endl;            }            if (tr.target.ptr) {//ptr存在的情况下，                sp<BBinder> b((BBinder*)tr.cookie);//cookie是指向service对象指针，是继承BBinder的，转换为BBinder调用。                const status_t error = b->transact(tr.code, buffer, &reply, tr.flags);                if (error < NO_ERROR) reply.setError(error);            } else {//thre_context_object没有看到被set的地方，目前没有使用到。                const status_t error = the_context_object->transact(tr.code, buffer, &reply, tr.flags);                if (error < NO_ERROR) reply.setError(error);            }                        //ALOGI("<<<< TRANSACT from pid %d restore pid %d uid %d\n",            //     mCallingPid, origPid, origUid);                        if ((tr.flags & TF_ONE_WAY) == 0) {                LOG_ONEWAY("Sending reply to %d!", mCallingPid);                sendReply(reply, 0);//发送返回信息。            } else {                LOG_ONEWAY("NOT sending reply to %d!", mCallingPid);//不需要返回值的情况。            }                        mCallingPid = origPid;            mCallingUid = origUid;            IF_LOG_TRANSACTIONS() {                TextOutput::Bundle _b(alog);                alog << "BC_REPLY thr " << (void*)pthread_self() << " / obj "                    << tr.target.ptr << ": " << indent << reply << dedent << endl;            }                    }        break;

调用还是很简单的，从data中读取cookie，就是Service对象的指针，转换为BBinder对象，调用BBinder的transact函数：

status_t BBinder::transact(    uint32_t code, const Parcel& data, Parcel* reply, uint32_t flags){    data.setDataPosition(0);    status_t err = NO_ERROR;    switch (code) {        case PING_TRANSACTION:            reply->writeInt32(pingBinder());            break;        default:            err = onTransact(code, data, reply, flags);            break;    }    if (reply != NULL) {        reply->setDataPosition(0);    }    return err;}

在BBinder::transact()函数中，回去调用我们在BnSampleService中重写的onTransact()函数，最终调用到SampleService::sayHello()了。

调用后，回到executeCommand()中看看sendReply()里面怎么返回的：

status_t IPCThreadState::sendReply(const Parcel& reply, uint32_t flags){    status_t err;    status_t statusBuffer;    err = writeTransactionData(BC_REPLY, flags, -1, 0, reply, &statusBuffer);    if (err < NO_ERROR) return err;        return waitForResponse(NULL, NULL);}

这里和IPCTransaction::transact()一样，调用了writeTransactionData()来组织要写给binder的binder_transaction_data数据，之后调用waitForResponse()来将数据写给binder。
注意binder返回的数据，在executeComand()中被付给了Parcel buffer这个局部对象，在buffer析构的时候，回去free掉内存，不需要我们再去释放了。

3.5.3 sayHello() client端处理reply

reply的处理这里就不说了，和前面所讲到的处理是i一样的，没有任何差别。

3.6 TF_ONE_WAY调用的差别

在前面的分析中，我们已经碰到了很多次TF_ONE_WAY这个flag，在binder中也看不到有些和正常调用不一样的处理，这边我们来写一个TF_ONE_WAY调用的接口来分下TF_ONE_WAY调用的流程上的差异。
首先在SampleService里面加了一个新的接口：

int SampleService::sayHelloAsync(const String8& clientName){    ALOGD("Async Hello to %s",clientName.string());    return 0;}

BpSampleService的实现是：

virtual int sayHelloAsync(const String8& clientName){        Parcel data,reply;        data.writeInterfaceToken(ISampleService::getInterfaceDescriptor());        data.writeString8(clientName);        remote()->transact(BnSampleService::SAY_HELLO, data, &reply,TF_ONE_WAY);        int ret = reply.readInt32();        if(NOT_ENOUGH_DATA == ret)            ALOGD("for async mode ,there is no reply ~~");        else            ALOGD("sayHelloAsync return %d",ret);        return 0;    }

3.6.1 client调用过程
在IPCThreadState::transact()中的流程有差异，调用waitForResponse()的reply为NULL：

err = waitForResponse(NULL, NULL);

之后还是调用talkWithDriver()去和binder交互，看看binder部分的差别，我们看有判断TF_ONE_WAY部分的代码就可以了。
binder_transaction()中差异：

if (!reply && !(tr->flags & TF_ONE_WAY))        t->from = thread;//同步transaction的时候，记录from。    else        t->from = NULL;//异步transaction或者reply时候，不需要记录from了。

异步模式的时候，不会在binder_transaction中记录from的thread。

t->buffer = binder_alloc_buf(target_proc, tr->data_size,        tr->offsets_size, !reply && (t->flags & TF_ONE_WAY));

申请binder_buffer的时候，异步模式申请的时候，最后一个参数is_async是为true的，看下binder_alloc_buffer中对于is_async的处理的差异：

if (is_async &&        proc->free_async_space < size + sizeof(struct binder_buffer)) {        binder_debug(BINDER_DEBUG_BUFFER_ALLOC,                 "%d: binder_alloc_buf size %zd failed, no async space left\n",                  proc->pid, size);        return NULL;    }

    buffer->async_transaction = is_async;    if (is_async) {        //update free_async_space        proc->free_async_space -= size + sizeof(struct binder_buffer);        binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,                 "%d: binder_alloc_buf size %zd async free %zd\n",                  proc->pid, size, proc->free_async_space);    }

异步模式下，要检测和更新free_async_space空间，另外就是影响了binder_buffer中的async_transaction的flag。
从这里我们可以看出来free_async_space是对于async调用的一个限制，使用的空间不能超过mmap size的一半。

if (reply) {        BUG_ON(t->buffer->async_transaction != 0);        //清除掉binder_tranasaction发起thread的transaction_stack。        binder_pop_transaction(target_thread, in_reply_to);    } else if (!(t->flags & TF_ONE_WAY)) {//同步transaction        BUG_ON(t->buffer->async_transaction != 0);        t->need_reply = 1;        //transaction_stack指向当前thread的最后一个binder_transaction，通过from_parent进行链接。        t->from_parent = thread->transaction_stack;        thread->transaction_stack = t;    } else {//异步transaction        BUG_ON(target_node == NULL);        BUG_ON(t->buffer->async_transaction != 1);        if (target_node->has_async_transaction) {            target_list = &target_node->async_todo;            target_wait = NULL;        } else            target_node->has_async_transaction = 1;    }

这边TF_ONE_WAY，在这里有比较重大的影响，如果当前没有异步调用的时候，只是设置target_node的has_async_transaction为1，否则的话，为更改target_list为target_node的async_todo，而不是原来的thread/proc的todo list。

之后进入binder_thread_read()，client端在这里是read了BR_TRANSACTION_COMPLETE command后，返回user层，在IPCThreadState::waitForResponse()中进行BR_TRANSACTION_COMPLETE的处理：

case BR_TRANSACTION_COMPLETE:            if (!reply && !acquireResult) goto finish;            break;

这里我们看到了差异，在异步模式下，reply和acquireResult都是NULL，这里会直接进入finish的代码段，这样waitForResponse()函数就返回了到了我们BpSampleService::sayHelloAsync()中了，这里的reply中没有写入任何的数据。

到这里，client调用已经完全结束了，我们可以清楚看到了异步调用下，我们只是发出command后就退出了，并不会等待service返回BR_REPLY的信息。

3.6.2 service的响应
SampleService还是从binder_thread_read()中读取command：

       if (t->buffer->target_node) {//transaction cmd时候。            //对于binder_context_mgr_node，ptr和cookie都为0。            //对于一般service来说，binder_node中的ptr是service的weakrefs指针,cookie是service的对象指针。（见Parcel中的flatten_binder()）            struct binder_node *target_node = t->buffer->target_node;            tr.target.ptr = target_node->ptr;            tr.cookie =  target_node->cookie;            t->saved_priority = task_nice(current);            //同步模式下设置priority            if (t->priority < target_node->min_priority &&                !(t->flags & TF_ONE_WAY))                binder_set_nice(t->priority);            else if (!(t->flags & TF_ONE_WAY) ||                 t->saved_priority > target_node->min_priority)                binder_set_nice(target_node->min_priority);            cmd = BR_TRANSACTION;        } else {//reply cmd时候，reply时候target_node为null。            tr.target.ptr = NULL;            tr.cookie = NULL;            cmd = BR_REPLY;        }

同步模式下，将client的priority传递给service端。异步模式下，不需要传递，因为client不会等待service的reply。

        //记录sender_pid。        //异步模式，或者reply情况下，t->from == 0.        if (t->from) {            struct task_struct *sender = t->from->proc->tsk;            tr.sender_pid = task_tgid_nr_ns(sender,                            task_active_pid_ns(current));        } else {            tr.sender_pid = 0;        }

t->from是client在binder_transaction()中设置的，对于异步模式下，from为NULL。from在这里影响了binder_thread_read()中记录sender_pid，我们后面在user层处理中看到sender_pid。

       if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) {            //同步模式下更新stack信息，binder_transaction t会在reply时候在binder_transaction()中pop掉。            t->to_parent = thread->transaction_stack;            t->to_thread = thread;            thread->transaction_stack = t;//把最近读到的binder_transaction设置为transaction_stack        } else {//异步模式，或者reply时，binder_transaction已经不需要了            //（reply时候才需要通过binder_transaction找到reply的target thread），在这里直接释放掉。            t->buffer->transaction = NULL;            kfree(t);            binder_stats_deleted(BINDER_STAT_TRANSACTION);        }

异步模式时候，binder_transaction没有记录到thread->transaction_stack中去，而是直接释放了。因为service不会发送reply信息，也不需要binder_transaction了。

从binder_thread_read()中返回后，回到user层处理读取到的BR_TRANSACTION：

            //client的pid和uid。在异步模式下的时候为0。            mCallingPid = tr.sender_pid;            mCallingUid = tr.sender_euid;

异步模式的时候，这里的sender_pid为0，而不是client的真实pid。

另外差异就在于reply的发送，同步模式会发送BC_REPLY给binder，但是对于异步模式来说我们什么都不需要做了。

除去reply之外，我们还要看下freebuffer的处理，Parcel data析构会去调用IPCThreadState::freeBuffer()去发送BC_FREE_BUFFER给binder，我们到binder_thread_write()中看看：

          //异步操作的时候，work会被加入到async_todo的list中，在free时候，把下一个work挪到thread的todo中去。           if (buffer->async_transaction && buffer->target_node) {                BUG_ON(!buffer->target_node->has_async_transaction);                if (list_empty(&buffer->target_node->async_todo))                    buffer->target_node->has_async_transaction = 0;                else                    list_move_tail(buffer->target_node->async_todo.next, &thread->todo);            }

这部分代码和之前binder_transaction()中的代码是呼应的,

        if (target_node->has_async_transaction) {            target_list = &target_node->async_todo;            target_wait = NULL;        } else            target_node->has_async_transaction = 1;

当前的target_node中有异步操作在执行的时候，work加入到了binder_node.async_todo的list中去，当一个async操作完成的时候，再从async_todo中拿出一个新的work加入到thread的todo list中去。
这样的操作逻辑，让异步操作是在一个thread中去完成，因为异步操作的client不会等待，不需要尽快的返回，可以空出更多的thread给同步操作使用。

接着我们看看释放binder_buffer的函数binder_free_buf()：

    if (buffer->async_transaction) {        proc->free_async_space += size + sizeof(struct binder_buffer);        binder_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,                 "%d: binder_free_buf size %zd async free %zd\n",                  proc->pid, size, proc->free_async_space);    }

和申请的时候一样，需要更新一下free_async_space的值。这里我们已经看出来free_async_space是异步操作所能申请的buffer的最大size，每次异步操作申请的时候，会在free_async_space中减掉，而在释放的时候，会增加free_async_space。
free_async_space的最大值是mmap size的一半，也就是说binder要求至少保留一半的内存给同步操作去使用。

3.6.3 小结
异步操作的流程我们已经看完了，差异主要是：
1，client发出异步操作后，直接返回，不等待service reply。
2，client发给service的work，在service在处理其他异步操作时候，会被加入到binder_node.async_todo中去，而不是thread/proc的todo中。
4，client发给service的binder_transaction结构，在service read的时候，会直接释放。
5，client传给service的sender_pid为0。
6，service不会发送BC_REPLY给binder。
7，处理BC_FREE_BUFFER时会检测async_todo是否还有work，如果有，会挪到thread.todo中去执行。

3.7 DeathRecipient调用的分析
在第二章编写SampleService的时候，我们在client调用中调用了linkToDeath()去注册service的died的notification，但是实际跑起来之后，这个callback没有被调用到。
我们这里分析下代码，这个回调是怎么工作的，为什么我们的注册没有工作起来。

client中注册的动作如下：

    DeathCallBack* deathCb = new DeathCallBack();    sampleSrv->asBinder()->linkToDeath(deathCb);

调用了BpBinder::linkToDeath()

status_t BpBinder::linkToDeath(    const sp<DeathRecipient>& recipient, void* cookie, uint32_t flags){    Obituary ob;    ob.recipient = recipient;    ob.cookie = cookie;    ob.flags = flags;    LOG_ALWAYS_FATAL_IF(recipient == NULL,                        "linkToDeath(): recipient must be non-NULL");    {        AutoMutex _l(mLock);        if (!mObitsSent) {//判断BpBinder是否已经die。            if (!mObituaries) {//构造一个vector来保存注册的DeathRecipient,并发送BC_REQUEST_DEATH_NOTIFICATION给binder。                mObituaries = new Vector<Obituary>;                if (!mObituaries) {                    return NO_MEMORY;                }                ALOGV("Requesting death notification: %p handle %d\n", this, mHandle);                getWeakRefs()->incWeak(this);                IPCThreadState* self = IPCThreadState::self();                self->requestDeathNotification(mHandle, this);                self->flushCommands();            }            ssize_t res = mObituaries->add(ob);//把注册的DeathRecipient加入到mObituaries。            return res >= (ssize_t)NO_ERROR ? (status_t)NO_ERROR : res;        }    }    return DEAD_OBJECT;}

在这里，注册的DeathRecipient是被加入到了mObituaries这个vector里面去了，没有直接传递给binder。
mObituaries创建时候，通过函数requestDeathNotificaiton()，写了一个command BC_REQUEST_DEATH_NOTIFICATION,

status_t IPCThreadState::requestDeathNotification(int32_t handle, BpBinder* proxy){    mOut.writeInt32(BC_REQUEST_DEATH_NOTIFICATION);    mOut.writeInt32((int32_t)handle);    mOut.writeInt32((int32_t)proxy);    return NO_ERROR;}

这个command在flushCommands()中被写入到binder ：

void IPCThreadState::flushCommands(){    if (mProcess->mDriverFD <= 0)        return;    talkWithDriver(false);//false表明不会读取command,只是去写command。}

之后在binder中处理BC_REQUEST_DEATH_NOTIFICATION，之前我们在分析addService()的时候，已经看到了这个command的处理，主要行为就是为当前的binder_ref创建了了一个binder_ref_death对象。

但是我们的问题还没有解决，当service挂掉的时候，是如何被通知到的呢？考service异常退出的时候是不会来得及去和binder做交互的，那么一定是系统帮忙做了什么。
再想到当进程异常退出的时候，系统会帮助我们做file的flush,release的动作来回收资源，binder对于系统来说一样也是个file，所以也会做一样的处理，所以我们关注一下binder_flush()和binder_release()：

static int binder_flush(struct file *filp, fl_owner_t id){    struct binder_proc *proc = filp->private_data;    if(proc->bLogDbg)        printk("Process %d called flush \n",proc->pid);    binder_defer_work(proc, BINDER_DEFERRED_FLUSH);    return 0;}

这里调用了binder_defer_work（）:

static voidbinder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer){    mutex_lock(&binder_deferred_lock);    proc->deferred_work |= defer;//记录defer work的flag。    if (hlist_unhashed(&proc->deferred_work_node)) {        hlist_add_head(&proc->deferred_work_node,                &binder_deferred_list);        //将binder_deferred_work放到binder_deferred_workqueue中去。        queue_work(binder_deferred_workqueue, &binder_deferred_work);    }    mutex_unlock(&binder_deferred_lock);}

所以flush()中，在proc的deferred_work中设上了BINDER_DEFERRED_FLUSH，并在binder_deferred_workqueue中加入了一个新的work。
binder_deferred_workqueue是binder在binder_init()中建立的一个workqueue，任务放到bufferqueue中会被逐个执行。

binder_deferred_work是下面的代码声明

static DECLARE_WORK(binder_deferred_work, binder_deferred_func);

也就是work的执行，就是调用binder_deferred_func()这个函数，

static void binder_deferred_func(struct work_struct *work){    struct binder_proc *proc;    struct files_struct *files;    int defer;    do {        binder_lock(__func__);        mutex_lock(&binder_deferred_lock);        if (!hlist_empty(&binder_deferred_list)) {            proc = hlist_entry(binder_deferred_list.first,                    struct binder_proc, deferred_work_node);            hlist_del_init(&proc->deferred_work_node);            defer = proc->deferred_work;            proc->deferred_work = 0;//获取defer的值        } else {            proc = NULL;            defer = 0;        }        mutex_unlock(&binder_deferred_lock);        files = NULL;        if (defer & BINDER_DEFERRED_PUT_FILES) {            files = proc->files;            if (files)                proc->files = NULL;        }        //处理binder_flush()传来的state BINDER_DEFERRED_FLUSH        if (defer & BINDER_DEFERRED_FLUSH)            binder_deferred_flush(proc);        //处理binder_release()传来的state BINDER_DEFERRED_RELEASE        if (defer & BINDER_DEFERRED_RELEASE)            binder_deferred_release(proc); /* frees proc */        binder_unlock(__func__);        if (files)            put_files_struct(files);    } while (proc);}

最终我们调用了binder_deferred_flush(),

static void binder_deferred_flush(struct binder_proc *proc){    struct rb_node *n;    int wake_count = 0;    //让进程所有等待的thread都立刻返回出去。    for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) {        struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node);        thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN;        if (thread->looper & BINDER_LOOPER_STATE_WAITING) {            wake_up_interruptible(&thread->wait);            wake_count++;        }    }    wake_up_interruptible_all(&proc->wait);    binder_debug(BINDER_DEBUG_OPEN_CLOSE,             "binder_flush: %d woke %d threads\n", proc->pid,             wake_count);}

这个函数中让所有等待的thread，都从等待中退出出去（BINDER_LOOPER_STATE_NEED_RETURN会让thread从binder_thread_read中直接返回出去，即使没有读到任何东西）。

继续看下binder_release()，

static int binder_release(struct inode *nodp, struct file *filp){    struct binder_proc *proc = filp->private_data;    debugfs_remove(proc->debugfs_entry);    binder_defer_work(proc, BINDER_DEFERRED_RELEASE);    return 0;}

和binder_flush()基本一样，只是push的work的state是BINDER_DEFERRED_RELEASE，看看这个类型在binder_deferred_func()中的处理，

        //处理binder_release()传来的state BINDER_DEFERRED_RELEASE        if (defer & BINDER_DEFERRED_RELEASE)            binder_deferred_release(proc); /* frees proc */

调用了binder_deferred_release()，

static void binder_deferred_release(struct binder_proc *proc){    //对当前进程相关的资源做处理。    struct binder_transaction *t;    struct rb_node *n;    int threads, nodes, incoming_refs, outgoing_refs, buffers,        active_transactions, page_count;    BUG_ON(proc->vma);    BUG_ON(proc->files);    hlist_del(&proc->proc_node);    if (binder_context_mgr_node && binder_context_mgr_node->proc == proc) {        binder_debug(BINDER_DEBUG_DEAD_BINDER,                 "%s: %d context_mgr_node gone\n",                 __func__, proc->pid);        binder_context_mgr_node = NULL;    }    threads = 0;    active_transactions = 0;    //清除所有的binder_thread对象。    while ((n = rb_first(&proc->threads))) {        struct binder_thread *thread;        thread = rb_entry(n, struct binder_thread, rb_node);        threads++;        active_transactions += binder_free_thread(proc, thread);    }    nodes = 0;    incoming_refs = 0;    //release所有的binder_node对象的资源，当进程中有多个serivce的时候，会有多个binder_node。    //注意这里只是对binder_node做release动作，并不是delete掉binder_node的对象，这个对象因为client还在引用，所以暂时不会被删除。    while ((n = rb_first(&proc->nodes))) {        struct binder_node *node;        node = rb_entry(n, struct binder_node, rb_node);        nodes++;        rb_erase(&node->rb_node, &proc->nodes);        incoming_refs = binder_node_release(node, incoming_refs);    }    outgoing_refs = 0;    //清除当前进程的reference。    while ((n = rb_first(&proc->refs_by_desc))) {        struct binder_ref *ref;        ref = rb_entry(n, struct binder_ref, rb_node_desc);        outgoing_refs++;        binder_delete_ref(ref);    }    //清除两个list。    binder_release_work(&proc->todo);    binder_release_work(&proc->delivered_death);    buffers = 0;    //释放掉已经申请的binder_buffer。    while ((n = rb_first(&proc->allocated_buffers))) {        struct binder_buffer *buffer;        buffer = rb_entry(n, struct binder_buffer, rb_node);        t = buffer->transaction;        if (t) {            t->buffer = NULL;            buffer->transaction = NULL;            pr_err("release proc %d, transaction %d, not freed\n",                   proc->pid, t->debug_id);            /*BUG();*/        }        binder_free_buf(proc, buffer);        buffers++;    }    binder_stats_deleted(BINDER_STAT_PROC);    //释放mmap的物理page。    page_count = 0;    if (proc->pages) {        int i;        for (i = 0; i < proc->buffer_size / PAGE_SIZE; i++) {            void *page_addr;            if (!proc->pages[i])                continue;            page_addr = proc->buffer + i * PAGE_SIZE;            binder_debug(BINDER_DEBUG_BUFFER_ALLOC,                     "%s: %d: page %d at %p not freed\n",                     __func__, proc->pid, i, page_addr);            unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);            __free_page(proc->pages[i]);            page_count++;        }        kfree(proc->pages);        vfree(proc->buffer);    }    put_task_struct(proc->tsk);    binder_debug(BINDER_DEBUG_OPEN_CLOSE,             "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d, buffers %d, pages %d\n",             __func__, proc->pid, threads, nodes, incoming_refs,             outgoing_refs, active_transactions, buffers, page_count);    kfree(proc);}

这里我们重点关注一下binder_node的release，这个会设计到client端的binder_ref的处理，

static int binder_node_release(struct binder_node *node, int refs){    struct binder_ref *ref;    int death = 0;    list_del_init(&node->work.entry);    binder_release_work(&node->async_todo);    if (hlist_empty(&node->refs)) {        kfree(node);        binder_stats_deleted(BINDER_STAT_NODE);        return refs;    }    node->proc = NULL;//将proc置空，binder_proc在release之后不会存在了，但是binder_node不会被delete。    node->local_strong_refs = 0;    node->local_weak_refs = 0;    hlist_add_head(&node->dead_node, &binder_dead_nodes);//加入到binder_dead_nodes中去。    //遍历node对应的所有的binder_ref对象。    hlist_for_each_entry(ref, &node->refs, node_entry) {        refs++;        if (!ref->death)            continue;        death++;        if (list_empty(&ref->death->work.entry)) {//在client的proc.todo中加入BINDER_WORK_DEAD_BINDER work.            ref->death->work.type = BINDER_WORK_DEAD_BINDER;            list_add_tail(&ref->death->work.entry,                      &ref->proc->todo);            wake_up_interruptible(&ref->proc->wait);        } else            BUG();    }    binder_debug(BINDER_DEBUG_DEAD_BINDER,             "node %d now dead, refs %d, death %d\n",             node->debug_id, refs, death);    return refs;}

在这里，binder_node.proc被reset为NULL，遍历了所有的binder_ref，如果binder_ref.death非空，会在binder_ref所在的proc中加入BINDER_WORK_DEAD_BINDER的work。
binder_ref.death正是我们在BC_REQUEST_DEATH_NOTIFICATION中为binder_ref创建的。

这里我们已经看出问题了，client在没有和binder交互的情况下，是不可能处理到BINDER_WORK_DEAD_BINDER这个work的。所以我们在client这段也需要调用一下IPCThreadState::joinThreadPool()。

我们假设已经建立了一个thread的情况继续往下分析。
回到SampleService的client端来，当SampleService挂掉之后，client的todo中有了一个BINDER_WORK_DEAD_BINDER的work。

        case BINDER_WORK_DEAD_BINDER:        case BINDER_WORK_DEAD_BINDER_AND_CLEAR:        case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: {            struct binder_ref_death *death;            uint32_t cmd;            //获取binder_ref_death对象。            death = container_of(w, struct binder_ref_death, work);            if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)                cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;            else                cmd = BR_DEAD_BINDER;            //写回cmd。            if (put_user(cmd, (uint32_t __user *)ptr))                return -EFAULT;            ptr += sizeof(uint32_t);            if (put_user(death->cookie, (void * __user *)ptr))                return -EFAULT;            ptr += sizeof(void *);            binder_stat_br(proc, thread, cmd);            binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION,                     "%d:%d %s %p\n",                      proc->pid, thread->pid,                      cmd == BR_DEAD_BINDER ?                      "BR_DEAD_BINDER" :                      "BR_CLEAR_DEATH_NOTIFICATION_DONE",                      death->cookie);            if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) {                list_del(&w->entry);                kfree(death);                binder_stats_deleted(BINDER_STAT_DEATH);            } else//加入到delivered_death list                list_move(&w->entry, &proc->delivered_death);            if (cmd == BR_DEAD_BINDER)                goto done; /* DEAD_BINDER notifications can cause transactions */        } break;

这里向user层写回了command BR_DEAD_BINDER，并把当前的work加入到delivered_death list中去。

回到user层后，处理BR_DEAD_BINDER,

         case BR_DEAD_BINDER:        {            BpBinder *proxy = (BpBinder*)mIn.readInt32();            proxy->sendObituary();//调用注册的death callback.            mOut.writeInt32(BC_DEAD_BINDER_DONE);//写回BC_DEAD_BINDER_DONE，告诉binder已经处理完毕。            mOut.writeInt32((int32_t)proxy);        } break;

这里调用了BpBinder::sendObituary()，

void BpBinder::sendObituary(){    ALOGV("Sending obituary for proxy %p handle %d, mObitsSent=%s\n",        this, mHandle, mObitsSent ? "true" : "false");    mAlive = 0;    if (mObitsSent) return;    mLock.lock();    Vector<Obituary>* obits = mObituaries;    if(obits != NULL) {        ALOGV("Clearing sent death notification: %p handle %d\n", this, mHandle);        IPCThreadState* self = IPCThreadState::self();        self->clearDeathNotification(mHandle, this);//写BC_CLEAR_DEATH_NOTIFICATION command给binder。        self->flushCommands();        mObituaries = NULL;    }    mObitsSent = 1;    mLock.unlock();    ALOGV("Reporting death of proxy %p for %d recipients\n",        this, obits ? obits->size() : 0);    if (obits != NULL) {        const size_t N = obits->size();        for (size_t i=0; i<N; i++) {//调用注册在vector中的DeathRecipient。            reportOneDeath(obits->itemAt(i));        }        delete obits;    }}

我们注册的DeathRecipient就是保存在了mObituaries这个vecotor，在函数最后的for 循环中，每个注册的DeathRecipient最终被执行。

void BpBinder::reportOneDeath(const Obituary& obit){    sp<DeathRecipient> recipient = obit.recipient.promote();    ALOGV("Reporting death to recipient: %p\n", recipient.get());    if (recipient == NULL) return;    recipient->binderDied(this);}

处理完BR_DEAD_BINDER之后，client又向binder中写了两个command，BC_CLEAR_DEATH_NOTIFICATION 和 BC_DEAD_BINDER_DONE，再进入binder中看这两个command的处理。
BC_CLEAR_DEATH_NOTIFICATION在binder_thread_write()中是和BC_REQUEST_DEATH_NOTIFICATION在一个case中处理的，他们的处理是相反的， BC_REQUEST_DEATH_NOTIFICATION为当前的binder_ref建立了一个新的binder_ref_death对象，而BC_CLEAR_DEATH_NOTIFICATION是清除了当前的binder_ref.death指针：

} else {                if (ref->death == NULL) {                    binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n",                        proc->pid, thread->pid);                    break;                }                death = ref->death;                if (death->cookie != cookie) {                    binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %p != %p\n",                        proc->pid, thread->pid,                        death->cookie, cookie);                    break;                }                ref->death = NULL;//清除binder_ref中的death指针。                if (list_empty(&death->work.entry)) {//death work没有被加入到某个list中，也就是service died还没有被触发。                    death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;                    if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) {                        list_add_tail(&death->work.entry, &thread->todo);                    } else {                        list_add_tail(&death->work.entry, &proc->todo);                        wake_up_interruptible(&proc->wait);                    }                } else {//death work已经被加入到list中，也就是 service died已经触发了，修改work type。                    BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER);                    death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR;                }            }

这边根据death work有没有被加入处理的list（也就是对应的service die是否已经出发）来分成两个case分别去看，我们现在的走的else中的流程，在这里death.work（注意这个work是在death.delivered_death这个list中）被修改成了BINDER_WORK_DEAD_BINDER_AND_CLEAR。

之后，binder继续处理BC_DEAD_BINDER_DONE这个command,

case BC_DEAD_BINDER_DONE: {            struct binder_work *w;            void __user *cookie;            struct binder_ref_death *death = NULL;            if (get_user(cookie, (void __user * __user *)ptr))                return -EFAULT;            ptr += sizeof(void *);            list_for_each_entry(w, &proc->delivered_death, entry) {//根据cookie（user层的BpBinder指针）查找到对应的binder_ref_death。                struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);                if (tmp_death->cookie == cookie) {                    death = tmp_death;                    break;                }            }            binder_debug(BINDER_DEBUG_DEAD_BINDER,                     "%d:%d BC_DEAD_BINDER_DONE %p found %p\n",                     proc->pid, thread->pid, cookie, death);            if (death == NULL) {                binder_user_error("%d:%d BC_DEAD_BINDER_DONE %p not found\n",                    proc->pid, thread->pid, cookie);                break;            }            list_del_init(&death->work.entry);//从deliver_death list中删除当前的work。            if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) {                death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION;//发送BINDER_WORK_CLEAR_DEATH_NOTIFICATION work去清除death。                if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) {                    list_add_tail(&death->work.entry, &thread->todo);                } else {                    list_add_tail(&death->work.entry, &proc->todo);                    wake_up_interruptible(&proc->wait);                }            }        } break;

这边从delivered_death list中删除了death的work，又发了一个新的work BINDER_WORK_CLEAR_DEATH_NOTIFICATION给当前的进程。

从binder_thread_write()中退出后binder在binder_thread_read()中会接受到BINDER_WORK_CLEAR_DEATH_NOTIFICATION进行处理，这个command和前面的BINDER_WORK_DEAD_BINDER是一个case中处理，主要行为，

            if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION)                cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE;            else                cmd = BR_DEAD_BINDER;

            if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) {                list_del(&w->entry);                kfree(death);                binder_stats_deleted(BINDER_STAT_DEATH);            }

这里又回复给user层一个BR_CLEAR_DEATH_NOTIFICATION_DONE的command，并且释放了之前申请的binder_ref_death对象。
BR_CLEAR_DEATH_NOTIFICATION_DONE在user层处理：

case BR_CLEAR_DEATH_NOTIFICATION_DONE:        {            BpBinder *proxy = (BpBinder*)mIn.readInt32();            proxy->getWeakRefs()->decWeak(proxy);        } break;

就是对BpBinder做了decrease weak的动作，这个是和BpBinder::linkToDeath()中的下列代码呼应的。

getWeakRefs()->incWeak(this);

到这，整个DeathRecipient的流程就结束了，我们从中也看到了death的回调整个流程是需要client不停的和binder去交互的，所以我们在client中要也要加上IPCThreadState::joinThreadPool()才行。对client的main函数做如下修改后，我们的deathcallback就可以被调用到了。

#if 0    do{        sleep(2);        if(deathCalled){            ALOGE("death callback called\n");            break;        }    }while(1); #else    IPCThreadState::self()->joinThreadPool(true); #endif

四，binder总结

基本上所有的点在上面已经都分析到了。这里list下kernel中的关键对象：
binder_proc：和user层ProcessState对应。
binder_thread：和user层IPCThreadState对应。
binder_node：service在binder中的代表。
binder_ref：client在binder中的代表。
binder_work：传递信息的表达。
binder_proc/binder_thread.todo：维护binder_work，用来传递信息的重要对象。

End!

-------------------------------------------

by sky

0 0