virtio-netdev 数据包的发送(基于kernel v3.10)

来源:互联网 发布:淘宝女包品牌排行 编辑:程序博客网 时间:2024/06/05 06:08

 

在前面几文中已经大体介绍了virtio的重要组成,包括virtio net设备的创建,vring的创建,与virtio设备的交互方式,我们就从网络数据包的发送角度来看下virtio的具体使用流程。

流程分析

当Kernel中的网络数据包从内核协议栈下来后,必然要走到设备注册的发送函数, virtio netdev注册的的发送函数即virtnet_netdev中的start_xmit();

static const struct net_device_ops virtnet_netdev = {

.ndo_open = virtnet_open,

.ndo_stop = virtnet_close,

.ndo_start_xmit = start_xmit, <----------------

.ndo_validate_addr = eth_validate_addr,

.ndo_set_mac_address = virtnet_set_mac_address,

.ndo_set_rx_mode = virtnet_set_rx_mode,

.ndo_change_mtu = virtnet_change_mtu,

.ndo_get_stats64 = virtnet_stats,

.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,

.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,

#ifdef CONFIG_NET_POLL_CONTROLLER

.ndo_poll_controller = virtnet_netpoll,

#endif

};

 

static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)

{

......

err = xmit_skb(sq, skb);

......

virtqueue_kick(sq->vq);

......

}

在start_xmit()中,主要的操作是使数据包入vring队列:

static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)

{

struct skb_vnet_hdr *hdr;

hdr_len = sizeof hdr->hdr;

hdr = skb_vnet_hdr(skb);

......

sg_set_buf(sq->sg, hdr, hdr_len);

num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;

return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);

}

在每个进入scatter-gather list的packet之前,需要有一个virtio_net_hdr结构的头部信息,用以支持checksum offload与TCP/UDP Segmentation offload。所以在上述流程中先使用sg_set_buf(sq->sg, hdr, hdr_len)将virtio-net-hdr的buffer填入了scatter-gather list,如下是virtio_net_hdr的结构;

struct skb_vnet_hdr {

union {

struct virtio_net_hdr hdr;

struct virtio_net_hdr_mrg_rxbuf mhdr;

};

};

struct virtio_net_hdr {

#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset

#define VIRTIO_NET_HDR_F_DATA_VALID 2 // Csum is valid

__u8 flags;

#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame

#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)

#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)

#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP

#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set

__u8 gso_type;

__u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */

__u16 gso_size; /* Bytes to append to hdr_len per frame */

__u16 csum_start; /* Position to start checksumming from */

__u16 csum_offset; /* Offset after that to place checksum */

};

将virtio_net_hdr塞入scatter-gather list,然后再入packet的buffer。其中都会调用到sg_set_page(),主要的操作就是计算待发送数据buffer占用的page的基址,相对基址的偏移量及length。

static inline void sg_set_buf(struct scatterlist *sg, const void *buf, unsigned int buflen)

{

sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));

}

 

static inline void sg_set_page(struct scatterlist *sg, struct page *page,

unsigned int len, unsigned int offset)

{

sg_assign_page(sg, page);

sg->offset = offset;

sg->length = len;

}

 

static inline void sg_assign_page(struct scatterlist *sg, struct page *page)

{

unsigned long page_link = sg->page_link & 0x3;

/*

* In order for the low bit stealing approach to work, pages

* must be aligned at a 32-bit boundary as a minimum.

*/

BUG_ON((unsigned long) page & 0x03);

#ifdef CONFIG_DEBUG_SG

BUG_ON(sg->sg_magic != SG_MAGIC);

BUG_ON(sg_is_chain(sg));

#endif

sg->page_link = page_link | (unsigned long) page;

}

virtio-netdev 数据包的发送 - 六六哥 - 六六哥的博客

 

skbuffer与sg list的关系如上所示。

最后调用return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);进入vring操作阶段。

/**

* virtqueue_add_outbuf - expose output buffers to other end

* @vq: the struct virtqueue we're talking about.

* @sgs: array of scatterlists (need not be terminated!)

* @num: the number of scatterlists readable by other side

* @data: the token identifying the buffer.

* @gfp: how to do memory allocations (if necessary).

*

* Caller must ensure we don't call this with other virtqueue operations

* at the same time (except where noted).

*

* Returns zero or a negative error (ie. ENOSPC, ENOMEM).

*/

int virtqueue_add_outbuf(struct virtqueue *vq,

struct scatterlist sg[], unsigned int num,

void *data,

gfp_t gfp)

{

return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);

}

  virtio-netdev 数据包的发送 - 六六哥 - 六六哥的博客

 

static inline int virtqueue_add(struct virtqueue *_vq,

struct scatterlist *sgs[],

struct scatterlist *(*next)

(struct scatterlist *, unsigned int *),

unsigned int total_out,

unsigned int total_in,

unsigned int out_sgs,

unsigned int in_sgs,

void *data,

gfp_t gfp)

{

......

head = i = vq->free_head;

for (n = 0; n < out_sgs; n++) {

for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {

vq->vring.desc[i].flags = VRING_DESC_F_NEXT;

vq->vring.desc[i].addr = sg_phys(sg);

vq->vring.desc[i].len = sg->length;

prev = i;

i = vq->vring.desc[i].next; //通过next字段找到下一个可用的desc

}

}

 

/* Last one doesn't continue. */

vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;

 

/* Update free pointer */

vq->free_head = i;

 

/* Set token. */

vq->data[head] = data;

 

/* Put entry in available array (but don't update avail->idx until they do sync). */

avail = (vq->vring.avail->idx & (vq->vring.num-1));

vq->vring.avail->ring[avail] = head;

/* Descriptors and available array need to be set before we expose the new available array entries. */

virtio_wmb(vq->weak_barriers);

vq->vring.avail->idx++;

vq->num_added++;

}

  1. 从head = i = vq->free_head;找到第一片可用的desc;
  2. 从sg list将需要发送buffer信息读取并填充vring的desc描述符;
    addr: guest的物理地址
    len: buffer的长度
    flags: VRING_DESC_F_NEXT表示该片buffer还有后续片
  3. 将最后一片占用的desc的flag作下标记,表示buffer片的终结;
  4. 更新空闲desc的指针;
  5. 将skb保存在data[]中作为token,用完后再释放;
  6. 更新avail描述符,将待发送的第一片buffer在desc中的序号写入空闲的avail ring中,并更新avail描述队列的序号等。

网上一幅图可以看到这些操作的关系:

virtio-netdev 数据包的发送 - 六六哥 - 六六哥的博客

 

在start_xmit中,待发送的信息入队列后,使用virtqueue_kick(sq->vq)通告Host端;

bool virtqueue_kick(struct virtqueue *vq)

{

if (virtqueue_kick_prepare(vq))
virtqueue_notify(vq);

}


void virtqueue_notify(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);

/* Prod other side to tell it about changes. */
vq->notify(_vq);
}

其中vq->notify即是vring创建时注册的vp_notify。

static void vp_notify(struct virtqueue *vq)
{
     struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);

     /* we write the queue's selector into the notification register to
     * signal the other end */
     iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
}

通过配置VIRTIO_PCI_QUEUE_NOTIFY域来进行通告。接下来就是HOST一端的处理了。

从整个前端发送流程可以看出,一个数据包发送时只是将skb的地址及长度等信息通告了virtio driver,而vring的空间是和后端共享的,所以该传输过程为零拷贝,这也是virtio高性能的一个原因

 

 

0 0
原创粉丝点击