Linux kernel 帧的接收
来源:互联网 发布:武汉必佳软件 编辑:程序博客网 时间:2024/06/06 17:07
Linux kernel 帧的接收
本文以e1000驱动为例,基于3.10.0-514.10.2版本内核。
驱动注册 - e1000_init_module
[root@10-254-0-111 ~]# modprobe e1000 #插入e1000模块 或者[root@10-254-0-111 ~]# insmod /path/to/e1000.ko #insmod后面指定e1000.ko文件路径
上面的操作对应的实现代码如下:
/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***/ //该e1000驱动对应的pci驱动实例 static struct pci_driver e1000_driver = { .name = e1000_driver_name, //e1000驱动程序名称,默认e1000 .id_table = e1000_pci_tbl, //该驱动程序所支持的网卡设备列表 .probe = e1000_probe, //设备初始化函数,当PCI子系统检测到该驱动所支持的设备被插入到总线上时,调用该函数对设备进行初始化操作。 .remove = e1000_remove, //移除设备时(热插拔或驱动程序被移除时)调用该函数#ifdef CONFIG_PM //电源管理 /* Power Management Hooks */ .suspend = e1000_suspend, //系统休眠时调用 .resume = e1000_resume, //系统被唤醒时调用#endif .shutdown = e1000_shutdown, //系统关闭时调用 .err_handler = &e1000_err_handler //错误处理器};/** * e1000_init_module - 驱动注册函数 * * e1000_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem. **/static int __init e1000_init_module(void){ int ret; pr_info("%s - version %s\n", e1000_driver_string, e1000_driver_version); pr_info("%s\n", e1000_copyright); /* 注册pci驱动 - 把e1000驱动程序以pci_driver形式注册到pci子系统中 */ ret = pci_register_driver(&e1000_driver); if (copybreak != COPYBREAK_DEFAULT) { if (copybreak == 0) pr_info("copybreak disabled\n"); else pr_info("copybreak enabled for " "packets <= %u bytes\n", copybreak); } return ret;}module_init(e1000_init_module);
pci_register_driver() 注册的是驱动程序,是把驱动程序安装到内核,准确的说是安装到内核的PCI子系统中。此时还没有设备出现,但是内核已经具备管理e1000设备的能力。
设备发现和初始化 - e1000_probe
刚刚我们通过e1000_init_module()把e1000驱动程序注册到PCI子系统,这样当有e1000设备插入到PCI总线的时候,PCI子系统就可以发现该设备,并调用之前注册的函数e1000_probe()对设备进行初始化
/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//* 设备的操作函数 */static const struct net_device_ops e1000_netdev_ops = { .ndo_open = e1000_open, //打开设备 .ndo_stop = e1000_close, //关闭设备 .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_rx_mode = e1000_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, .ndo_tx_timeout = e1000_tx_timeout, .ndo_change_mtu = e1000_change_mtu, .ndo_do_ioctl = e1000_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid,#ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = e1000_netpoll,#endif .ndo_fix_features = e1000_fix_features, .ndo_set_features = e1000_set_features,};/** * e1000_probe - 设备初始化函数 * @pdev: PCI device information struct * @ent: entry in e1000_pci_tbl * * Returns 0 on success, negative on failure * * e1000_probe initializes an adapter identified by a pci_dev structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent){ struct net_device *netdev; struct e1000_adapter *adapter; //设备私有数据 struct e1000_hw *hw; ... err = -ENOMEM; /* 为设备分配net_device结构体 */ netdev = alloc_etherdev(sizeof(struct e1000_adapter)); if (!netdev) goto err_alloc_etherdev; ... /* 设置设备的私有数据 */ adapter = netdev_priv(netdev); adapter->netdev = netdev; adapter->pdev = pdev; adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); adapter->bars = bars; adapter->need_ioport = need_ioport; ... /* 设置设备的操作函数 */ netdev->netdev_ops = &e1000_netdev_ops; e1000_set_ethtool_ops(netdev); /** * 初始化该设备的napi,用于下半部接收数据; * 该函数初始化napi字段,并把napi注册到全局napi_hash表中。 **/ netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); /* 设置设备名称 */ strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); ... /* 初始化延时任务,如看门狗任务 */ INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog); INIT_DELAYED_WORK(&adapter->fifo_stall_task, e1000_82547_tx_fifo_stall_task); INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task); INIT_WORK(&adapter->reset_task, e1000_reset_task); ... /* reset the hardware with the new settings */ e1000_reset(adapter); strcpy(netdev->name, "eth%d"); /* 注册网络设备net_device */ err = register_netdev(netdev); if (err) goto err_register; ...}
开启设备 - e1000_open
当使用用户空间工具(如ifconfig、iproute)开启网络设备时,PCI子系统就会调用函数e1000_open()
/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog task is started, * and the stack is notified that the interface is ready. **/static int e1000_open(struct net_device *netdev){ struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int err; /* disallow open during test */ if (test_bit(__E1000_TESTING, &adapter->flags)) return -EBUSY; netif_carrier_off(netdev); /* allocate transmit descriptors */ err = e1000_setup_all_tx_resources(adapter); if (err) goto err_setup_tx; /* allocate receive descriptors */ err = e1000_setup_all_rx_resources(adapter); if (err) goto err_setup_rx; e1000_power_up_phy(adapter); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; if ((hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) { e1000_update_mng_vlan(adapter); } /* before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our * clean_rx handler before we do so. */ e1000_configure(adapter); //注册中断请求,中断处理函数为e1000_intr err = e1000_request_irq(adapter); if (err) goto err_req_irq; /* From here on the code is the same as e1000_up() */ clear_bit(__E1000_DOWN, &adapter->flags); napi_enable(&adapter->napi); e1000_irq_enable(adapter); netif_start_queue(netdev); /** * fire a link status change interrupt to start the watchdog, * 触发链路状态变更中断,启动看门狗 **/ ew32(ICS, E1000_ICS_LSC); return E1000_SUCCESS;err_req_irq: e1000_power_down_phy(adapter); e1000_free_all_rx_resources(adapter);err_setup_rx: e1000_free_all_tx_resources(adapter);err_setup_tx: e1000_reset(adapter); return err;}
设备中断 - e1000_intr
当网卡接收到数据就会发出中断请求(IRQ),对应的中断处理函数就是e1000_intr(),该函数运行在中断上下文中,不可休眠。
/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//** * e1000_intr - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/static irqreturn_t e1000_intr(int irq, void *data){ struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 icr = er32(ICR); if (unlikely((!icr))) return IRQ_NONE; /* Not our interrupt */ /* we might have caused the interrupt, but the above * read cleared it, and just in case the driver is * down there is nothing to do so return handled */ if (unlikely(test_bit(__E1000_DOWN, &adapter->flags))) return IRQ_HANDLED; if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) { hw->get_link_status = 1; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->flags)){ //启动看门狗任务 schedule_delayed_work(&adapter->watchdog_task, 1); } } /* disable interrupts, without the synchronize_irq bit */ ew32(IMC, ~0); E1000_WRITE_FLUSH(); if (likely(napi_schedule_prep(&adapter->napi))) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; /** * 调度接收 * 1、把napi加入到cpu的softnet_data.poll_list * 2、触发软中断(softirq)NET_RX_SOFTIRQ 准备接收数据 **/ __napi_schedule(&adapter->napi); } else { /* this really should not happen! if it does it is basically a * bug, but not a hard error, so enable ints and continue */ if (!test_bit(__E1000_DOWN, &adapter->flags)) e1000_irq_enable(adapter); } return IRQ_HANDLED;}
下半部处理 - 软中断
设备初始化完毕并开启后,就进入就绪状态,当有数据到达网卡就触发中断(硬件中断),内核执行对应的中断处理程序(中断处理程序是上半部,要简短、迅速),当执行完中断处理程序后内核触发NET_RX_SOFTIRQ软中断,进行下半部处理,NET_RX_SOFTIRQ对应的软中断处理函数是net_rx_action(),注册的地方是在系统初始化时:
/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***/static int __init net_dev_init(void){ int i, rc = -ENOMEM; BUG_ON(!dev_boot_phase); ... open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); //注册网络收包的软中断处理函数 hotcpu_notifier(dev_cpu_callback, 0); dst_subsys_init(); rc = 0; ...}
net_rx_action()函数
/*** net/core/dev.c ***/static void net_rx_action(struct softirq_action *h){ //获取当前CPU的softnet_data数据 struct softnet_data *sd = this_cpu_ptr(&softnet_data); //设置处理时限 unsigned long time_limit = jiffies + 2; //设置此次最大处理包数 int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); local_irq_disable(); //把poll_list连接到list上,然后把poll_list清空(把poll_list中的napi缓存到list中) list_splice_init(&sd->poll_list, &list); local_irq_enable(); //对list中的napi循环处理 for (;;) { struct napi_struct *n; if (list_empty(&list)) { if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) return; break; } n = list_first_entry(&list, struct napi_struct, poll_list); /** * 调用napi->poll函数从设备接收数据,poll函数是由设备驱动程序提供,并在 * 设备初始化时通过netif_napi_add()函数放到设备的private data中。 * poll函数从硬件中读取数据然后通过napi_gro_receive()函数把skb发送到 * 网络协议栈做进一步处理。 **/ budget -= napi_poll(n, &repoll); /* If softirq window is exhausted then punt. * Allow this to run for 2 jiffies since which will allow * an average latency of 1.5/HZ. */ if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) { sd->time_squeeze++; break; } } __kfree_skb_flush(); local_irq_disable(); //把未完成的napi加入到CPU全局poll_list中,待下次处理软中断时再次处理 list_splice_tail_init(&sd->poll_list, &list); list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); //poll_list非空,此次处理超过时限或超过最大处理包数,重新出发软中断进行处理。 if (!list_empty(&sd->poll_list)) __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd);}
看门狗
看门狗是用来检测设备状态的,在e1000_open()中启动。
/** * e1000_watchdog - work function * @work: work struct contained inside adapter struct **/static void e1000_watchdog(struct work_struct *work){ struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, watchdog_task.work); struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; struct e1000_tx_ring *txdr = adapter->tx_ring; u32 link, tctl; link = e1000_has_link(adapter); //链路已激活并有载波->链路状态正常,去更新统计数据 if ((netif_carrier_ok(netdev)) && link) goto link_up; if (link) { if (!netif_carrier_ok(netdev)) { //链路已激活但是没有载波,检测到设备启动 u32 ctrl; bool txb2b = true; /* update snapshot of PHY registers on LSC */ e1000_get_speed_and_duplex(hw, &adapter->link_speed, &adapter->link_duplex); ctrl = er32(CTRL); pr_info("%s NIC Link is Up %d Mbps %s, " "Flow Control: %s\n", netdev->name, adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? "Full Duplex" : "Half Duplex", ((ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl & E1000_CTRL_RFCE) ? "RX" : ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None"))); /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; switch (adapter->link_speed) { case SPEED_10: txb2b = false; adapter->tx_timeout_factor = 16; break; case SPEED_100: txb2b = false; /* maybe add some timeout factor ? */ break; } /* enable transmits in the hardware */ tctl = er32(TCTL); tctl |= E1000_TCTL_EN; ew32(TCTL, tctl); netif_carrier_on(netdev); if (!test_bit(__E1000_DOWN, &adapter->flags)) schedule_delayed_work(&adapter->phy_info_task, 2 * HZ); adapter->smartspeed = 0; } } else { if (netif_carrier_ok(netdev)) { //链路未激活但有载波,检测到设备关闭 adapter->link_speed = 0; adapter->link_duplex = 0; pr_info("%s NIC Link is Down\n", netdev->name); netif_carrier_off(netdev); if (!test_bit(__E1000_DOWN, &adapter->flags)) schedule_delayed_work(&adapter->phy_info_task, 2 * HZ); } e1000_smartspeed(adapter); }link_up: e1000_update_stats(adapter); hw->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old; adapter->tpt_old = adapter->stats.tpt; hw->collision_delta = adapter->stats.colc - adapter->colc_old; adapter->colc_old = adapter->stats.colc; adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old; adapter->gorcl_old = adapter->stats.gorcl; adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old; adapter->gotcl_old = adapter->stats.gotcl; e1000_update_adaptive(hw); if (!netif_carrier_ok(netdev)) { if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); /* exit immediately since reset is imminent */ return; } } /* Simple mode for Interrupt Throttle Rate (ITR) */ if (hw->mac_type >= e1000_82540 && adapter->itr_setting == 4) { /* Symmetric Tx/Rx gets a reduced ITR=2000; * Total asymmetrical Tx or Rx gets ITR=8000; * everyone else is between 2000-8000. */ u32 goc = (adapter->gotcl + adapter->gorcl) / 10000; u32 dif = (adapter->gotcl > adapter->gorcl ? adapter->gotcl - adapter->gorcl : adapter->gorcl - adapter->gotcl) / 10000; u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000; ew32(ITR, 1000000000 / (itr * 256)); } /* Cause software interrupt to ensure rx ring is cleaned */ ew32(ICS, E1000_ICS_RXDMT0); /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = true; /* Reschedule the task 2HZ后再次调用看门狗检测设备状态*/ if (!test_bit(__E1000_DOWN, &adapter->flags)){ schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); }}
欢迎交流学习!
0 0
- Linux kernel 帧的接收
- Linux Kernel网络数据接收过程
- linux kernel的开始
- linux kernel的一些写法
- 学习 linux kernel的建议
- Linux kernel printk的分析
- 学习linux kernel的好书
- linux kernel patch的说明
- Linux Kernel 的学习笔记
- Linux Kernel Thread 的讨论
- 一個簡單的 Linux Kernel Module
- linux kernel的启动过程
- build linux kernel的错误
- Linux kernel printk的用法
- linux的数据结构---kernel链表
- Linux kernel的启动跟踪
- android linux kernel和standard linux kernel的区别
- Linux网卡数据包的接收
- mysql 添加列,修改列,删除列
- 常用正则规范
- verilog 中的case语句辨析
- 剑指offer 面试题3:二维数组中查找
- 网络常用端口号大全
- Linux kernel 帧的接收
- 第四十讲项目一 小明借书
- 1027. 打印沙漏
- 解决textarea光标初始位置没有在最左上角的问题
- hibernate的使用(关于hibernate事务的必须开启的问题)
- JS的方法substring(index1,index2)和方法substr(index1,index2)
- 新装LINUX系统如何获得ROOT权限,,图文教程
- laydate 时间日历插件
- Reverse_tcp反向连接获得shell实验