Linux kernel 帧的接收

来源:互联网 发布:武汉必佳软件 编辑:程序博客网 时间:2024/06/06 17:07

Linux kernel 帧的接收

本文以e1000驱动为例,基于3.10.0-514.10.2版本内核。


驱动注册 - e1000_init_module

[root@10-254-0-111 ~]# modprobe e1000  #插入e1000模块    或者[root@10-254-0-111 ~]# insmod /path/to/e1000.ko   #insmod后面指定e1000.ko文件路径

上面的操作对应的实现代码如下:

/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***/ //该e1000驱动对应的pci驱动实例 static struct pci_driver e1000_driver = {        .name     = e1000_driver_name, //e1000驱动程序名称,默认e1000        .id_table = e1000_pci_tbl,     //该驱动程序所支持的网卡设备列表        .probe    = e1000_probe,       //设备初始化函数,当PCI子系统检测到该驱动所支持的设备被插入到总线上时,调用该函数对设备进行初始化操作。        .remove   = e1000_remove,      //移除设备时(热插拔或驱动程序被移除时)调用该函数#ifdef CONFIG_PM //电源管理        /* Power Management Hooks */        .suspend  = e1000_suspend,     //系统休眠时调用        .resume   = e1000_resume,      //系统被唤醒时调用#endif                          .shutdown = e1000_shutdown,    //系统关闭时调用        .err_handler = &e1000_err_handler //错误处理器};/** * e1000_init_module - 驱动注册函数 * * e1000_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem. **/static int __init e1000_init_module(void){    int ret;    pr_info("%s - version %s\n", e1000_driver_string, e1000_driver_version);    pr_info("%s\n", e1000_copyright);    /* 注册pci驱动 - 把e1000驱动程序以pci_driver形式注册到pci子系统中 */    ret = pci_register_driver(&e1000_driver);    if (copybreak != COPYBREAK_DEFAULT) {        if (copybreak == 0)            pr_info("copybreak disabled\n");        else            pr_info("copybreak enabled for "                   "packets <= %u bytes\n", copybreak);    }    return ret;}module_init(e1000_init_module);

pci_register_driver() 注册的是驱动程序,是把驱动程序安装到内核,准确的说是安装到内核的PCI子系统中。此时还没有设备出现,但是内核已经具备管理e1000设备的能力。

设备发现和初始化 - e1000_probe

刚刚我们通过e1000_init_module()把e1000驱动程序注册到PCI子系统,这样当有e1000设备插入到PCI总线的时候,PCI子系统就可以发现该设备,并调用之前注册的函数e1000_probe()对设备进行初始化

/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//* 设备的操作函数 */static const struct net_device_ops e1000_netdev_ops = {        .ndo_open               = e1000_open,   //打开设备        .ndo_stop               = e1000_close,   //关闭设备        .ndo_start_xmit         = e1000_xmit_frame,        .ndo_get_stats          = e1000_get_stats,        .ndo_set_rx_mode        = e1000_set_rx_mode,        .ndo_set_mac_address    = e1000_set_mac,        .ndo_tx_timeout         = e1000_tx_timeout,        .ndo_change_mtu         = e1000_change_mtu,        .ndo_do_ioctl           = e1000_ioctl,        .ndo_validate_addr      = eth_validate_addr,        .ndo_vlan_rx_add_vid    = e1000_vlan_rx_add_vid,        .ndo_vlan_rx_kill_vid   = e1000_vlan_rx_kill_vid,#ifdef CONFIG_NET_POLL_CONTROLLER        .ndo_poll_controller    = e1000_netpoll,#endif        .ndo_fix_features       = e1000_fix_features,        .ndo_set_features       = e1000_set_features,};/** * e1000_probe - 设备初始化函数 * @pdev: PCI device information struct * @ent: entry in e1000_pci_tbl * * Returns 0 on success, negative on failure * * e1000_probe initializes an adapter identified by a pci_dev structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent){    struct net_device *netdev;    struct e1000_adapter *adapter;   //设备私有数据    struct e1000_hw *hw;    ...    err = -ENOMEM;    /* 为设备分配net_device结构体 */    netdev = alloc_etherdev(sizeof(struct e1000_adapter));    if (!netdev)        goto err_alloc_etherdev;    ...    /* 设置设备的私有数据 */    adapter = netdev_priv(netdev);    adapter->netdev = netdev;    adapter->pdev = pdev;    adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);    adapter->bars = bars;    adapter->need_ioport = need_ioport;    ...    /* 设置设备的操作函数 */    netdev->netdev_ops = &e1000_netdev_ops;    e1000_set_ethtool_ops(netdev);    /**     * 初始化该设备的napi,用于下半部接收数据;     * 该函数初始化napi字段,并把napi注册到全局napi_hash表中。     **/    netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);    /* 设置设备名称 */    strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);    ...    /* 初始化延时任务,如看门狗任务 */    INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog);    INIT_DELAYED_WORK(&adapter->fifo_stall_task,              e1000_82547_tx_fifo_stall_task);    INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task);    INIT_WORK(&adapter->reset_task, e1000_reset_task);    ...    /* reset the hardware with the new settings */    e1000_reset(adapter);    strcpy(netdev->name, "eth%d");    /* 注册网络设备net_device */    err = register_netdev(netdev);    if (err)        goto err_register;    ...}

开启设备 - e1000_open

当使用用户空间工具(如ifconfig、iproute)开启网络设备时,PCI子系统就会调用函数e1000_open()

/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//** * e1000_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP).  At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog task is started, * and the stack is notified that the interface is ready. **/static int e1000_open(struct net_device *netdev){    struct e1000_adapter *adapter = netdev_priv(netdev);    struct e1000_hw *hw = &adapter->hw;    int err;    /* disallow open during test */    if (test_bit(__E1000_TESTING, &adapter->flags))        return -EBUSY;    netif_carrier_off(netdev);    /* allocate transmit descriptors */    err = e1000_setup_all_tx_resources(adapter);    if (err)        goto err_setup_tx;    /* allocate receive descriptors */    err = e1000_setup_all_rx_resources(adapter);    if (err)        goto err_setup_rx;    e1000_power_up_phy(adapter);    adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;    if ((hw->mng_cookie.status &              E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {        e1000_update_mng_vlan(adapter);    }    /* before we allocate an interrupt, we must be ready to handle it.     * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt     * as soon as we call pci_request_irq, so we have to setup our     * clean_rx handler before we do so.     */    e1000_configure(adapter);    //注册中断请求,中断处理函数为e1000_intr    err = e1000_request_irq(adapter);    if (err)        goto err_req_irq;    /* From here on the code is the same as e1000_up() */    clear_bit(__E1000_DOWN, &adapter->flags);    napi_enable(&adapter->napi);    e1000_irq_enable(adapter);    netif_start_queue(netdev);    /**     * fire a link status change interrupt to start the watchdog,     * 触发链路状态变更中断,启动看门狗    **/    ew32(ICS, E1000_ICS_LSC);    return E1000_SUCCESS;err_req_irq:    e1000_power_down_phy(adapter);    e1000_free_all_rx_resources(adapter);err_setup_rx:    e1000_free_all_tx_resources(adapter);err_setup_tx:    e1000_reset(adapter);    return err;}

设备中断 - e1000_intr

当网卡接收到数据就会发出中断请求(IRQ),对应的中断处理函数就是e1000_intr(),该函数运行在中断上下文中,不可休眠。

/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***//** * e1000_intr - Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure **/static irqreturn_t e1000_intr(int irq, void *data){    struct net_device *netdev = data;    struct e1000_adapter *adapter = netdev_priv(netdev);    struct e1000_hw *hw = &adapter->hw;    u32 icr = er32(ICR);    if (unlikely((!icr)))        return IRQ_NONE;  /* Not our interrupt */    /* we might have caused the interrupt, but the above     * read cleared it, and just in case the driver is     * down there is nothing to do so return handled     */    if (unlikely(test_bit(__E1000_DOWN, &adapter->flags)))        return IRQ_HANDLED;    if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {        hw->get_link_status = 1;        /* guard against interrupt when we're going down */        if (!test_bit(__E1000_DOWN, &adapter->flags)){            //启动看门狗任务            schedule_delayed_work(&adapter->watchdog_task, 1);        }    }    /* disable interrupts, without the synchronize_irq bit */    ew32(IMC, ~0);    E1000_WRITE_FLUSH();    if (likely(napi_schedule_prep(&adapter->napi))) {        adapter->total_tx_bytes = 0;        adapter->total_tx_packets = 0;        adapter->total_rx_bytes = 0;        adapter->total_rx_packets = 0;        /**         * 调度接收         * 1、把napi加入到cpu的softnet_data.poll_list         * 2、触发软中断(softirq)NET_RX_SOFTIRQ 准备接收数据        **/        __napi_schedule(&adapter->napi);    } else {        /* this really should not happen! if it does it is basically a         * bug, but not a hard error, so enable ints and continue         */        if (!test_bit(__E1000_DOWN, &adapter->flags))            e1000_irq_enable(adapter);    }    return IRQ_HANDLED;}

下半部处理 - 软中断

设备初始化完毕并开启后,就进入就绪状态,当有数据到达网卡就触发中断(硬件中断),内核执行对应的中断处理程序(中断处理程序是上半部,要简短、迅速),当执行完中断处理程序后内核触发NET_RX_SOFTIRQ软中断,进行下半部处理,NET_RX_SOFTIRQ对应的软中断处理函数是net_rx_action(),注册的地方是在系统初始化时:

/*** drivers/net/ethernet/intel/e1000/e1000_main.c ***/static int __init net_dev_init(void){        int i, rc = -ENOMEM;        BUG_ON(!dev_boot_phase);        ...        open_softirq(NET_TX_SOFTIRQ, net_tx_action);        open_softirq(NET_RX_SOFTIRQ, net_rx_action); //注册网络收包的软中断处理函数        hotcpu_notifier(dev_cpu_callback, 0);        dst_subsys_init();        rc = 0;        ...}

net_rx_action()函数

/*** net/core/dev.c ***/static void net_rx_action(struct softirq_action *h){        //获取当前CPU的softnet_data数据        struct softnet_data *sd = this_cpu_ptr(&softnet_data);        //设置处理时限        unsigned long time_limit = jiffies + 2;        //设置此次最大处理包数        int budget = netdev_budget;        LIST_HEAD(list);        LIST_HEAD(repoll);        local_irq_disable();        //把poll_list连接到list上,然后把poll_list清空(把poll_list中的napi缓存到list中)        list_splice_init(&sd->poll_list, &list);        local_irq_enable();        //对list中的napi循环处理        for (;;) {                struct napi_struct *n;                if (list_empty(&list)) {                        if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))                                return;                        break;                }                n = list_first_entry(&list, struct napi_struct, poll_list);                /**                 * 调用napi->poll函数从设备接收数据,poll函数是由设备驱动程序提供,并在                 * 设备初始化时通过netif_napi_add()函数放到设备的private data中。                 * poll函数从硬件中读取数据然后通过napi_gro_receive()函数把skb发送到                 * 网络协议栈做进一步处理。                **/                budget -= napi_poll(n, &repoll);                /* If softirq window is exhausted then punt.                 * Allow this to run for 2 jiffies since which will allow                 * an average latency of 1.5/HZ.                 */                if (unlikely(budget <= 0 ||                             time_after_eq(jiffies, time_limit))) {                        sd->time_squeeze++;                        break;                }        }        __kfree_skb_flush();        local_irq_disable();        //把未完成的napi加入到CPU全局poll_list中,待下次处理软中断时再次处理        list_splice_tail_init(&sd->poll_list, &list);        list_splice_tail(&repoll, &list);        list_splice(&list, &sd->poll_list);        //poll_list非空,此次处理超过时限或超过最大处理包数,重新出发软中断进行处理。        if (!list_empty(&sd->poll_list))                __raise_softirq_irqoff(NET_RX_SOFTIRQ);        net_rps_action_and_irq_enable(sd);}

看门狗

看门狗是用来检测设备状态的,在e1000_open()中启动。

/** * e1000_watchdog - work function * @work: work struct contained inside adapter struct **/static void e1000_watchdog(struct work_struct *work){    struct e1000_adapter *adapter = container_of(work,                             struct e1000_adapter,                             watchdog_task.work);    struct e1000_hw *hw = &adapter->hw;    struct net_device *netdev = adapter->netdev;    struct e1000_tx_ring *txdr = adapter->tx_ring;    u32 link, tctl;    link = e1000_has_link(adapter);    //链路已激活并有载波->链路状态正常,去更新统计数据    if ((netif_carrier_ok(netdev)) && link)        goto link_up;    if (link) {        if (!netif_carrier_ok(netdev)) {            //链路已激活但是没有载波,检测到设备启动            u32 ctrl;            bool txb2b = true;            /* update snapshot of PHY registers on LSC */            e1000_get_speed_and_duplex(hw,                           &adapter->link_speed,                           &adapter->link_duplex);            ctrl = er32(CTRL);            pr_info("%s NIC Link is Up %d Mbps %s, "                "Flow Control: %s\n",                netdev->name,                adapter->link_speed,                adapter->link_duplex == FULL_DUPLEX ?                "Full Duplex" : "Half Duplex",                ((ctrl & E1000_CTRL_TFCE) && (ctrl &                E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &                E1000_CTRL_RFCE) ? "RX" : ((ctrl &                E1000_CTRL_TFCE) ? "TX" : "None")));            /* adjust timeout factor according to speed/duplex */            adapter->tx_timeout_factor = 1;            switch (adapter->link_speed) {            case SPEED_10:                txb2b = false;                adapter->tx_timeout_factor = 16;                break;            case SPEED_100:                txb2b = false;                /* maybe add some timeout factor ? */                break;            }            /* enable transmits in the hardware */            tctl = er32(TCTL);            tctl |= E1000_TCTL_EN;            ew32(TCTL, tctl);            netif_carrier_on(netdev);            if (!test_bit(__E1000_DOWN, &adapter->flags))                schedule_delayed_work(&adapter->phy_info_task,                              2 * HZ);            adapter->smartspeed = 0;        }    } else {        if (netif_carrier_ok(netdev)) {            //链路未激活但有载波,检测到设备关闭            adapter->link_speed = 0;            adapter->link_duplex = 0;            pr_info("%s NIC Link is Down\n",                netdev->name);            netif_carrier_off(netdev);            if (!test_bit(__E1000_DOWN, &adapter->flags))                schedule_delayed_work(&adapter->phy_info_task,                              2 * HZ);        }        e1000_smartspeed(adapter);    }link_up:    e1000_update_stats(adapter);    hw->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;    adapter->tpt_old = adapter->stats.tpt;    hw->collision_delta = adapter->stats.colc - adapter->colc_old;    adapter->colc_old = adapter->stats.colc;    adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old;    adapter->gorcl_old = adapter->stats.gorcl;    adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old;    adapter->gotcl_old = adapter->stats.gotcl;    e1000_update_adaptive(hw);    if (!netif_carrier_ok(netdev)) {        if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) {            /* We've lost link, so the controller stops DMA,             * but we've got queued Tx work that's never going             * to get done, so reset controller to flush Tx.             * (Do the reset outside of interrupt context).             */            adapter->tx_timeout_count++;            schedule_work(&adapter->reset_task);            /* exit immediately since reset is imminent */            return;        }    }    /* Simple mode for Interrupt Throttle Rate (ITR) */    if (hw->mac_type >= e1000_82540 && adapter->itr_setting == 4) {        /* Symmetric Tx/Rx gets a reduced ITR=2000;         * Total asymmetrical Tx or Rx gets ITR=8000;         * everyone else is between 2000-8000.         */        u32 goc = (adapter->gotcl + adapter->gorcl) / 10000;        u32 dif = (adapter->gotcl > adapter->gorcl ?                adapter->gotcl - adapter->gorcl :                adapter->gorcl - adapter->gotcl) / 10000;        u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000;        ew32(ITR, 1000000000 / (itr * 256));    }    /* Cause software interrupt to ensure rx ring is cleaned */    ew32(ICS, E1000_ICS_RXDMT0);    /* Force detection of hung controller every watchdog period */    adapter->detect_tx_hung = true;    /* Reschedule the task 2HZ后再次调用看门狗检测设备状态*/    if (!test_bit(__E1000_DOWN, &adapter->flags)){        schedule_delayed_work(&adapter->watchdog_task, 2 * HZ);    }}

欢迎交流学习!

0 0
原创粉丝点击