【OVS2.5.0源码分析】upcall处理线程分析(1)
来源:互联网 发布:网络基本知识子网 编辑:程序博客网 时间:2024/06/05 09:32
upcall线程处理由datapath通过netlink机制上送的报文,其入口函数为udpif_upcall_handler,这一篇我们先分析该线程是如何收取upcall报文的。
1、udpif_upcall_handler函数
/* The upcall handler thread tries to read a batch of UPCALL_MAX_BATCH * upcalls from dpif, processes the batch and installs corresponding flows * in dpif. */static void *udpif_upcall_handler(void *arg){ struct handler *handler = arg; struct udpif *udpif = handler->udpif; while (!latch_is_set(&handler->udpif->exit_latch)) { if (recv_upcalls(handler)) { poll_immediate_wake(); //不阻塞,说明还有upcall需要处理 } else { dpif_recv_wait(udpif->dpif, handler->handler_id); //阻塞在netlink接收上 latch_wait(&udpif->exit_latch); } poll_block();//poll阻塞 } return NULL;}2、recv_upcalls函数
static size_trecv_upcalls(struct handler *handler){ struct udpif *udpif = handler->udpif; uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8]; struct ofpbuf recv_bufs[UPCALL_MAX_BATCH]; struct dpif_upcall dupcalls[UPCALL_MAX_BATCH]; struct upcall upcalls[UPCALL_MAX_BATCH]; struct flow flows[UPCALL_MAX_BATCH]; size_t n_upcalls, i; n_upcalls = 0; while (n_upcalls < UPCALL_MAX_BATCH) { struct ofpbuf *recv_buf = &recv_bufs[n_upcalls]; struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; struct upcall *upcall = &upcalls[n_upcalls]; struct flow *flow = &flows[n_upcalls]; unsigned int mru; int error; ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], sizeof recv_stubs[n_upcalls]); if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) { //接收upcall报文 ofpbuf_uninit(recv_buf); break; } if (odp_flow_key_to_flow(dupcall->key, dupcall->key_len, flow) == ODP_FIT_ERROR) { goto free_dupcall; } if (dupcall->mru) { mru = nl_attr_get_u16(dupcall->mru); } else { mru = 0; } error = upcall_receive(upcall, udpif->backer, &dupcall->packet, dupcall->type, dupcall->userdata, flow, mru, &dupcall->ufid, PMD_ID_NULL); if (error) { if (error == ENODEV) { /* Received packet on datapath port for which we couldn't * associate an ofproto. This can happen if a port is removed * while traffic is being received. Print a rate-limited * message in case it happens frequently. */ dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key, dupcall->key_len, NULL, 0, NULL, 0, &dupcall->ufid, PMD_ID_NULL, NULL); VLOG_INFO_RL(&rl, "received packet on unassociated datapath " "port %"PRIu32, flow->in_port.odp_port); } goto free_dupcall; } upcall->key = dupcall->key; upcall->key_len = dupcall->key_len; upcall->ufid = &dupcall->ufid; upcall->out_tun_key = dupcall->out_tun_key; upcall->actions = dupcall->actions; if (vsp_adjust_flow(upcall->ofproto, flow, &dupcall->packet)) { upcall->vsp_adjusted = true; } pkt_metadata_from_flow(&dupcall->packet.md, flow); flow_extract(&dupcall->packet, flow); error = process_upcall(udpif, upcall, &upcall->odp_actions, &upcall->wc); if (error) { goto cleanup; } n_upcalls++; continue;cleanup: upcall_uninit(upcall);free_dupcall: dp_packet_uninit(&dupcall->packet); ofpbuf_uninit(recv_buf); } if (n_upcalls) { handle_upcalls(handler->udpif, upcalls, n_upcalls); for (i = 0; i < n_upcalls; i++) { dp_packet_uninit(&dupcalls[i].packet); ofpbuf_uninit(&recv_bufs[i]); upcall_uninit(&upcalls[i]); } } return n_upcalls;}3、dpif_recv函数
/* Polls for an upcall from 'dpif' for an upcall handler. Since there * there can be multiple poll loops, 'handler_id' is needed as index to * identify the corresponding poll loop. If successful, stores the upcall * into '*upcall', using 'buf' for storage. Should only be called if * 'recv_set' has been used to enable receiving packets from 'dpif'. * * 'upcall->key' and 'upcall->userdata' point into data in the caller-provided * 'buf', so their memory cannot be freed separately from 'buf'. * * The caller owns the data of 'upcall->packet' and may modify it. If * packet's headroom is exhausted as it is manipulated, 'upcall->packet' * will be reallocated. This requires the data of 'upcall->packet' to be * released with ofpbuf_uninit() before 'upcall' is destroyed. However, * when an error is returned, the 'upcall->packet' may be uninitialized * and should not be released. * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */intdpif_recv(struct dpif *dpif, uint32_t handler_id, struct dpif_upcall *upcall, struct ofpbuf *buf){ int error = EAGAIN; if (dpif->dpif_class->recv) { error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf); //实际调用dpif_netlink_recv函数 if (!error) { dpif_print_packet(dpif, upcall); } else if (error != EAGAIN) { log_operation(dpif, "recv", error); } } return error;}4、dpif_netlink_recv函数
static intdpif_netlink_recv(struct dpif *dpif_, uint32_t handler_id, struct dpif_upcall *upcall, struct ofpbuf *buf){ struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); int error; fat_rwlock_rdlock(&dpif->upcall_lock);#ifdef _WIN32 error = dpif_netlink_recv_windows(dpif, handler_id, upcall, buf);#else error = dpif_netlink_recv__(dpif, handler_id, upcall, buf); //linux#endif fat_rwlock_unlock(&dpif->upcall_lock); return error;}5、dpif_netlink_recv__函数
static intdpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id, struct dpif_upcall *upcall, struct ofpbuf *buf) OVS_REQ_RDLOCK(dpif->upcall_lock){ struct dpif_handler *handler; int read_tries = 0; if (!dpif->handlers || handler_id >= dpif->n_handlers) { return EAGAIN; } handler = &dpif->handlers[handler_id]; if (handler->event_offset >= handler->n_events) { int retval; handler->event_offset = handler->n_events = 0; do { retval = epoll_wait(handler->epoll_fd, handler->epoll_events, //阻塞,直到有upcall dpif->uc_array_size, 0); } while (retval < 0 && errno == EINTR); if (retval < 0) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno)); } else if (retval > 0) { handler->n_events = retval; //表示channels的个数 } } while (handler->event_offset < handler->n_events) { int idx = handler->epoll_events[handler->event_offset].data.u32; //这个是谁定义的? struct dpif_channel *ch = &dpif->handlers[handler_id].channels[idx]; //处理多个channel,每个channel有一个sock handler->event_offset++; for (;;) { int dp_ifindex; int error; if (++read_tries > 50) { return EAGAIN; } error = nl_sock_recv(ch->sock, buf, false); //收取upcall if (error == ENOBUFS) { /* ENOBUFS typically means that we've received so many * packets that the buffer overflowed. Try again * immediately because there's almost certainly a packet * waiting for us. */ report_loss(dpif, ch, idx, handler_id); continue; } ch->last_poll = time_msec(); if (error) { if (error == EAGAIN) { break; } return error; } error = parse_odp_packet(dpif, buf, upcall, &dp_ifindex); if (!error && dp_ifindex == dpif->dp_ifindex) { return 0; } else if (error) { return error; } } } return EAGAIN;}6、nl_sock_recv函数
/* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'. If * 'wait' is true, waits for a message to be ready. Otherwise, fails with * EAGAIN if the 'sock' receive buffer is empty. * * The caller must have initialized 'buf' with an allocation of at least * NLMSG_HDRLEN bytes. For best performance, the caller should allocate enough * space for a "typical" message. * * On success, returns 0 and replaces 'buf''s previous content by the received * message. This function expands 'buf''s allocated memory, as necessary, to * hold the actual size of the received message. * * On failure, returns a positive errno value and clears 'buf' to zero length. * 'buf' retains its previous memory allocation. * * Regardless of success or failure, this function resets 'buf''s headroom to * 0. */intnl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait){ return nl_sock_recv__(sock, buf, wait);}7、nl_sock_recv__函数
static intnl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait){ /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; uint8_t tail[65536]; struct iovec iov[2]; struct msghdr msg; ssize_t retval; int error; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = buf->base; iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; /* Receive a Netlink message from the kernel. * * This works around a kernel bug in which the kernel returns an error code * as if it were the number of bytes read. It doesn't actually modify * anything in the receive buffer in that case, so we can initialize the * Netlink header with an impossible message length and then, upon success, * check whether it changed. */ nlmsghdr = buf->base; do { nlmsghdr->nlmsg_len = UINT32_MAX;#ifdef _WIN32 DWORD bytes; if (!DeviceIoControl(sock->handle, sock->read_ioctl, NULL, 0, tail, sizeof tail, &bytes, NULL)) { VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s", ovs_lasterror_to_string()); retval = -1; /* XXX: Map to a more appropriate error. */ errno = EINVAL; } else { retval = bytes; if (retval == 0) { retval = -1; errno = EAGAIN; } else { if (retval >= buf->allocated) { ofpbuf_reinit(buf, retval); nlmsghdr = buf->base; nlmsghdr->nlmsg_len = UINT32_MAX; } memcpy(buf->data, tail, retval); buf->size = retval; } }#else retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); //调用系统接口,接收netlink消息#endif error = (retval < 0 ? errno : retval == 0 ? ECONNRESET /* not possible? */ : nlmsghdr->nlmsg_len != UINT32_MAX ? 0 : retval); } while (error == EINTR); if (error) { if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %"PRIuSIZE" bytes)", sizeof tail); return E2BIG; } if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%"PRIuSIZE" bytes < %"PRIuSIZE")", retval, sizeof *nlmsghdr); return EPROTO; }#ifndef _WIN32 buf->size = MIN(retval, buf->allocated); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); }#endif log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); return 0;}用户态程序通过recvmsg函数获取netlink消息,而内核提供了genl框架可以更加简便地处理netlink消息。 其中channel的sock是如何建立,留待下一篇进行分析。
0 0
- 【OVS2.5.0源码分析】upcall处理线程分析(1)
- 【OVS2.5.0源码分析】upcall处理线程分析(2)
- 【OVS2.5.0源码分析】upcall处理线程分析(3)
- 【OVS2.5.0源码分析】upcall处理线程分析(4)
- 【OVS2.5.0源码分析】bridge&bundle&port分析(1)
- 【OVS2.5.0源码分析】openflow连接实现分析(1)
- 【OVS2.5.0源码分析】ovsd进程运行机制分析(1)
- 【OVS2.5.0源码分析】datapath之action分析(1)
- 【OVS2.5.0源码分析】sFlow实现分析(1)
- 【OVS2.5.0源码分析】mirror实现原理(1)
- 【OVS2.5.0源码分析】openflow连接实现分析(2)
- 【OVS2.5.0源码分析】openflow连接实现分析(3)
- 【OVS2.5.0源码分析】openflow连接实现分析(4)
- 【OVS2.5.0源码分析】openflow连接实现分析(5)
- 【OVS2.5.0源码分析】openflow连接实现分析(6)
- 【OVS2.5.0源码分析】datapath之action分析(2)
- 【OVS2.5.0源码分析】datapath之action分析(3)
- 【OVS2.5.0源码分析】datapath之action分析(4)
- 凌晨寄语
- 3.保护模式6-特权级转移(门描述符概述)
- 【OpenCV入门教程之六】 创建Trackbar & 图像对比度、亮度值调整
- vmware安装mac os
- 蓝桥杯-经典的递归问题(一)
- 【OVS2.5.0源码分析】upcall处理线程分析(1)
- CDH(Cloudera) hadoop 集成开发平台
- 医学/生物 名词
- 【OpenCV入门教程之七】 玩转OpenCV源代码:生成OpenCV工程解决方案与OpenCV源码编译
- 字符串,字符数组(c/c++)
- python获取中文字符串长度
- lesson 6 threads synchronization
- DELL R610的远程管理卡的问题
- PAT(B) - 1015. 德才论 (25)