半虚拟化下网络数据包的处理过程

来源:互联网 发布:javascript的常用类型 编辑:程序博客网 时间:2024/06/05 19:53

http://hi.baidu.com/%B0%B5%D4%C2%C1%F7%B9%E2/blog/item/2c21d0231c03dbffd7cae2eb.html

 

(Note: 先要理解Xen的split driver model,之后再看这篇文章)

 

 

(1)网络设备后端代码分析

 

linux中网络数据包处理流程:

R3      应用程序数据包发送
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.          TCP/IP协议栈处理
.                       v
R0       sk_buff格式数据
.                       v
.    网络设备发包函数完成发包过程
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.           硬                 件
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~



xen半虚拟化下网络数据包处理过程:
------------------------------------------             ---------------------------------------------
.                     R3                                                           R3
.                                                                                          应用程序发送数据包
.                                                                                                         v
-Dom0----------------------------------             --DomU------------------------------------
.                    R1                                                           R1

.           ----------------TCP/IP                                         TCP/IP
.          v                          ^                                                              v
真实设备驱动     后端设备驱动                              前端设备驱动
.          |                            ^                                                             | 
------ ---------------------------------------------------------------------------------------------
.          |                 R0     |             -------------------------       |
.          |                             -------|        发送数据              |<---
.          |                                          |   (共享内存方式)      |
.          |                                          -------------------------
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.          v                             硬                            件
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


由于DomU可以有多个,因此设备后端需要为多个设备前端提供服务,具有网桥的功能。借两张图来说明下:






网络设备后端初始化:

/linux-2.6.18-xen.hg/drivers/xen/netback/netback.c
1659 module_init(netback_init);           // 定义设备后端模块初始化函数

/linux-2.6.18-xen.hg/drivers/xen/netback/netback.c
1591 static int __init netback_init(void)
1592 {
.....................
//  skb队列初始化
1602     skb_queue_head_init(&rx_queue);
1603     skb_queue_head_init(&tx_queue);
......................
// xenbus初始化(下次找时间把整个xenbus的代码分析给补上)
1645     netif_xenbus_init();
......................
1656     return 0;
1657 }

/linux-2.6.18-xen.hg/drivers/xen/netback/netback.c
451 void netif_xenbus_init(void)
452 {
// 网络后端设备注册
453     xenbus_register_backend(&netback);
454 }

/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
440 static struct xenbus_driver netback = {
441     .name = "vif",
442     .owner = THIS_MODULE,
443     .ids = netback_ids,
444     .probe = netback_probe,                 // 后端设备回调函数
445     .remove = netback_remove,
446     .uevent = netback_uevent,
447     .otherend_changed = frontend_changed,   // 前端状态发生改变时调用
448 };


/linux-2.6.18-xen.hg/drivers/xen/xenbus/xenbus_probe_backend.c
175 int xenbus_register_backend(struct xenbus_driver *drv)
176 {
177     drv->read_otherend_details = read_frontend_details;
178
179     return xenbus_register_driver_common(drv, &xenbus_backend);
180 }
181 EXPORT_SYMBOL_GPL(xenbus_register_backend);

/linux-2.6.18-xen.hg/drivers/xen/xenbus/xenbus_probe_backend.c
112 static struct xen_bus_type xenbus_backend = {
113     .root = "backend",
114     .levels = 3,        /* backend/type/<frontend>/<id> */
115     .get_bus_id = backend_bus_id,
116     .probe = xenbus_probe_backend,
117     .error = -ENODEV,
118     .bus = {
119         .name     = "xen-backend",
120         .match    = xenbus_match,
121         .probe    = xenbus_dev_probe,
122         .remove   = xenbus_dev_remove,
123 //      .shutdown = xenbus_dev_shutdown,
124         .uevent   = xenbus_uevent_backend,
125     },
126     .dev = {
127         .bus_id = "xen-backend",
128     },
129 };

/linux-2.6.18-xen.hg/drivers/xen/xenbus/xenbus_probe_backend.c
373 int xenbus_register_driver_common(struct xenbus_driver *drv,
374                   struct xen_bus_type *bus)
375 {
............................
381     drv->driver.name = drv->name;
382     drv->driver.bus = &bus->bus;
.............................
392     mutex_lock(&xenwatch_mutex);
393     ret = driver_register(&drv->driver);         //注册后端设备driver
394     mutex_unlock(&xenwatch_mutex);
395     return ret;
396 }


/linux-2.6.18-xen.hg/drivers/base/driver.c
171 int driver_register(struct device_driver * drv)
172 {
.................
180     return bus_add_driver(drv);         // - - 后面就出了xen目录了,不分析了
181 }


下面来分析下注册的后端设备回调函数 netback_probe
/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
53 /**
54  * Entry point to this code when a new device is created.  Allocate the basic
55  * structures and switch to InitWait.
56  */
// 当一个新设备被创建的时候会调用该函数
57 static int netback_probe(struct xenbus_device *dev,
58              const struct xenbus_device_id *id)
59 {
................
// 分配一个与新设备对应的backend
64     struct backend_info *be = kzalloc(sizeof(struct backend_info),
65                       GFP_KERNEL);
................
72     be->dev = dev;
73     dev->dev.driver_data = be;
................
// 却换设备的状态为XenbusStateInitWait
128     err = xenbus_switch_state(dev, XenbusStateInitWait);
129     if (err)
130         goto fail;
131
132     /* This kicks hotplug scripts, so do it immediately. */
133     backend_create_netif(be);
.................
144 }

30 struct backend_info
31 {
32     struct xenbus_device *dev;
33     blkif_t *blkif;
34     struct xenbus_watch backend_watch;
35     unsigned major;
36     unsigned minor;
37     char *mode;
38 };

/linux-2.6.18-xen.hg/include/xen/interface/io/xenbus.h
36 enum xenbus_state {
37     XenbusStateUnknown       = 0,
38
39     XenbusStateInitialising  = 1,
40
41     /*
42      * InitWait: Finished early initialisation but waiting for information
43      * from the peer or hotplug scripts.
44      */
45     XenbusStateInitWait      = 2,
46
47     /*
48      * Initialised: Waiting for a connection from the peer.
49      */
50     XenbusStateInitialised   = 3,
51
52     XenbusStateConnected     = 4,
53
54     /*
55      * Closing: The device is being closed due to an error or an unplug event.
56      */
57     XenbusStateClosing       = 5,
58
59     XenbusStateClosed        = 6,
60
61     /*
62      * Reconfiguring: The device is being reconfigured.
63      */
64     XenbusStateReconfiguring = 7,
65
66     XenbusStateReconfigured  = 8
67 };
68 typedef enum xenbus_state XenbusState;


/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
183 static void backend_create_netif(struct backend_info *be)
184 {
185     int err;
186     long handle;
187     struct xenbus_device *dev = be->dev;
...............
// 
198     be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
...............
207 }


/linux-2.6.18-xen.hg/drivers/xen/netback/interface.c
178 netif_t *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
179 {
................
181     struct net_device *dev;
182     netif_t *netif;
183     char name[IFNAMSIZ] = {};
184
185     snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
186     dev = alloc_netdev(sizeof(netif_t), name, ether_setup);   // 创建网络设备
................
196     netif->domid  = domid;
197     netif->handle = handle;
................
199     init_waitqueue_head(&netif->waiting_to_free);
200     netif->dev = dev;
201
202     netback_carrier_off(netif);
203
204     netif->credit_bytes = netif->remaining_credit = ~0UL;
205     netif->credit_usec  = 0UL;
...................
207     /* Initialize 'expires' now: it's used to track the credit window. */
208     netif->credit_timeout.expires = jiffies;
...................
212     dev->hard_start_xmit = netif_be_start_xmit;       //  定义后端发送函数
213     dev->get_stats       = netif_be_get_stats;
214     dev->open            = net_open;
215     dev->stop            = net_close;
216     dev->change_mtu      = netbk_change_mtu;
217     dev->features        = NETIF_F_IP_CSUM;
.....................
232     rtnl_lock();
233     err = register_netdevice(dev);      // 向内核注册设备
234     rtnl_unlock();
.....................
243     return netif;
244 }



下面来分析下frontend_changed函数,该函数在前端设备状态发生改变之后被调用

/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
210 /**
211  * Callback received when the frontend's state changes.
212  */
213 static void frontend_changed(struct xenbus_device *dev,
214                  enum xenbus_state frontend_state)
215 {
.................
222     switch (frontend_state) {                                    // 根据前端设备状态进行处理
.................
234     case XenbusStateConnected:                             // 前端设备连接
235         if (dev->state == XenbusStateConnected)
236             break;
237         backend_create_netif(be);
238         if (be->netif)
239             connect(be);
240         break;
.................
265 }

/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
326 static void connect(struct backend_info *be)
327 {
.................
331     err = connect_rings(be);
.................
345     xenbus_switch_state(dev, XenbusStateConnected);    // 更改设备状态
.................
347     netif_wake_queue(be->netif->dev);
348 }

// connect_rings这个函数的分析主要是关注与event-channel有关的方面,可能不够客观
/linux-2.6.18-xen.hg/drivers/xen/netback/xenbus.c
351 static int connect_rings(struct backend_info *be)        
352 {
.................
355     unsigned int evtchn, rx_copy;
.................
361     err = xenbus_gather(XBT_NIL, dev->otherend,
362                 "tx-ring-ref", "%lu", &tx_ring_ref,
363                 "rx-ring-ref", "%lu", &rx_ring_ref,
364                 "event-channel", "%u", &evtchn, NULL);
..................
419     /* Map the shared frame, irq etc. */
420     err = netif_map(be->netif, tx_ring_refrx_ring_ref, evtchn);
...................
427     return 0;
428 }

tx_ring_ref - - the grant table reference for the transmission ring queue
rx_ring_ref - - the grant table reference for the receiving ring queue 

/linux-2.6.18-xen.hg/drivers/xen/netback/interface.c
306 int netif_map(netif_t *netif, unsigned long tx_ring_ref,
307           unsigned long rx_ring_ref, unsigned int evtchn)
308 {
......................
328     err = bind_interdomain_evtchn_to_irqhandler(
329         netif->domid, evtchn, netif_be_int, 0,
330         netif->dev->name, netif);
.......................
361 }

/linux-2.6.18-xen.hg/drivers/xen/core/evtchn.c
563 int bind_interdomain_evtchn_to_irqhandler(
564     unsigned int remote_domain,
565     unsigned int remote_port,
566     irqreturn_t (*handler)(int, void *, struct pt_regs *),
567     unsigned long irqflags,
568     const char *devname,
569     void *dev_id)
570 {
....................
573     irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
....................
577     retval = request_irq(irq, handler, irqflags, devname, dev_id);
....................
584 }

/linux-2.6.18-xen.hg/drivers/xen/core/evtchn.c
396 static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
397                       unsigned int remote_port)
398 {
399     struct evtchn_bind_interdomain bind_interdomain;
..............
405     err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
406                       &bind_interdomain);
407
408     return err ? : bind_local_port_to_irq(bind_interdomain.local_port);   //如果成功,则返回local port
409 }

/linux-2.6.18-xen.hg/include/xen/interface/event_channel.h
68 #define EVTCHNOP_bind_interdomain 0
69 struct evtchn_bind_interdomain {
70     /* IN parameters. */
71     domid_t remote_dom;
72     evtchn_port_t remote_port;
73     /* OUT parameters. */
74     evtchn_port_t local_port;
75 };

 

(2)网络设备前端代码分析

/linux-2.6.18-xen.hg/drivers/xen/netfront/netfront.c
2200 static int __init netif_init(void)
2201 {
...............
2225     err = xenbus_register_frontend(&netfront_driver);     //注册设备
...............
2232 }
2233 module_init(netif_init);

2187 static struct xenbus_driver netfront_driver = {
2188     .name = "vif",
2189     .owner = THIS_MODULE,
2190     .ids = netfront_ids,
2191     .probe = netfront_probe,                                 // 前端回调函数
2192     .remove = __devexit_p(netfront_remove),
2193     .suspend = netfront_suspend,
2194     .suspend_cancel = netfront_suspend_cancel,
2195     .resume = netfront_resume,
2196     .otherend_changed = backend_changed,         // 后端状态改变时的回调函数
2197 };

/linux-2.6.18-xen.hg/drivers/xen/netfront/netfront.c
240 /**
241  * Entry point to this code when a new device is created.  Allocate the basic
242  * structures and the ring buffers for communication with the backend, and
243  * inform the backend of the appropriate details for those.
244  */
245 static int __devinit netfront_probe(struct xenbus_device *dev,
246                     const struct xenbus_device_id *id)
247 {
.................
252     netdev = create_netdev(dev);                    // 创建网络设备
.................
262     err = register_netdev(info->netdev);          // 注册网络设备
.................
269     err = xennet_sysfs_addif(info->netdev);    // 将该设备加入文件系统
.................
283 }


/linux-2.6.18-xen.hg/drivers/xen/netfront/netfront.c
2037 static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
2038 {
................
2043     netdev = alloc_etherdev(sizeof(struct netfront_info));   //创建以太网设备
................
2093     netdev->open            = network_open;                       
2094     netdev->hard_start_xmit = network_start_xmit;  // 网络设备前端发送函数
2095     netdev->stop            = network_close;
2096     netdev->get_stats       = network_get_stats;
2097     netdev->poll            = netif_poll;  // 网络设备前端接收函数
.................
2120 }

/linux-2.6.18-xen.hg/drivers/xen/netfront/netfront.c
528 /**
529  * Callback received when the backend's state changes.
530  */
531 static void backend_changed(struct xenbus_device *dev,
532                 enum xenbus_state backend_state)
533 {
...................
539     switch (backend_state) {
...................
549     case XenbusStateInitWait:
550         if (dev->state != XenbusStateInitialising)
551             break;
552         if (network_connect(netdev) != 0)
553             break;
554         xenbus_switch_state(dev, XenbusStateConnected);
555         send_fake_arp(netdev);
556         break;
...................
561     }
562 }

/linux-2.6.18-xen.hg/drivers/xen/netfront/netfront.c
1775 static int network_connect(struct net_device *dev)
1776 {
...................
1801     err = talk_to_backend(np->xbdev, np);         // 与后端交互
...................
1867 }



361 /* Common code used when first setting up, and when resuming. */
362 static int talk_to_backend(struct xenbus_device *dev,
363                struct netfront_info *info)
364 {
...................
379     /* Create shared ring, alloc event channel. */
380     err = setup_device(dev, info);
...................
466 }

468 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
469 {
470     struct netif_tx_sring *txs; //发送环指针
471     struct netif_rx_sring *rxs; // 接收环指针
...................
// 为发送环分配页面
481     txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
...................
487     SHARED_RING_INIT(txs);   // 发送环始化
488     FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);  //发送环前端初始化
489
490     err = xenbus_grant_ring(dev, virt_to_mfn(txs));  // 运行设备访问该页
...................
// 接收环分配页面
497     rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
...................
503     SHARED_RING_INIT(rxs);  // 接收环初始化
504     FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);  // 接收环前端页面初始化
505
506     err = xenbus_grant_ring(dev, virt_to_mfn(rxs));  // 运行设备访问接收环页面
..................
515     err = bind_listening_port_to_irqhandler(        // 绑定事件通道端口
516         dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
517         netdev);
...................
526 }