ipmi watchdog

来源:互联网 发布:2年java开发工程师简历 编辑:程序博客网 时间:2024/06/07 11:49
IPMI watchdog 基本是定时和BMC 通信,如果BMC 在规定时间内回复了,则再次通信,否则BMC 那边的timer就会超时
从makefile中可以看到要使用IPMI watchdog
obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
必须enable CONFIG_IPMI_WATCHDOG
static int __init ipmi_wdog_init(void)
{
    int rv;
//解析模块参数action是否等于reset/none/power_cycle/power_off。
    if (action_op(action, NULL)) {
        action_op("reset", NULL);
        printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
               " reset\n", action);
    }
//解析preaction是否等于pre_none/pre_smi/pre_nmi/pre_int
    if (preaction_op(preaction, NULL)) {
        preaction_op("pre_none", NULL);
        printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
               " none\n", preaction);
    }
//解析preop 是否等于preop_none/preop_panic/preop_give_data
    if (preop_op(preop, NULL)) {
        preop_op("preop_none", NULL);
        printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
               " none\n", preop);
    }
//在非X86时,check_parms 函数为空函数
    check_parms();
//注册reboot和panic 通知链
    register_reboot_notifier(&wdog_reboot_notifier);
    atomic_notifier_chain_register(&panic_notifier_list,
            &wdog_panic_notifier);
//为每个ipmi_interfaces 注册ipmi_smi_watcher *watcher
    rv = ipmi_smi_watcher_register(&smi_watcher);
//rv返回非0,说明ipmi_smi_watcher_register函数failed了,而ipmi_smi_watcher_register 返回failed的唯一原因是申请不到内存。
    if (rv) {
#ifdef HAVE_DIE_NMI
        if (nmi_handler_registered)
            unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
#endif
        atomic_notifier_chain_unregister(&panic_notifier_list,
                         &wdog_panic_notifier);
        unregister_reboot_notifier(&wdog_reboot_notifier);
        printk(KERN_WARNING PFX "can't register smi watcher\n");
        return rv;
    }
可以在开机log中查看是否有下面这行log来确定ipmi_wdog_init是否跑了
    printk(KERN_INFO PFX "driver initialized\n");

    return 0;
}

action_op/preaction_op/preop_op 的流程类似,都是处理模块的参数,这里仅以action_op 为例

static int action_op(const char *inval, char *outval)
{
//这里的outval 为null
    if (outval)
        strcpy(outval, action);
//inval 不为null,为action
    if (!inval)
        return 0;
//通过strcmp 比较action是否等于reset/none/power_cycle/power_off,然后给action_val赋不同的值
    if (strcmp(inval, "reset") == 0)
        action_val = WDOG_TIMEOUT_RESET;
    else if (strcmp(inval, "none") == 0)
        action_val = WDOG_TIMEOUT_NONE;
    else if (strcmp(inval, "power_cycle") == 0)
        action_val = WDOG_TIMEOUT_POWER_CYCLE;
    else if (strcmp(inval, "power_off") == 0)
        action_val = WDOG_TIMEOUT_POWER_DOWN;
    else
        return -EINVAL;
    strcpy(action, inval);
    return 0;
}
ipmi_wdog_init 函数中最重要的就是ipmi_smi_watcher_register
int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher)
{
    ipmi_smi_t intf;
    LIST_HEAD(to_deliver);
    struct watcher_entry *e, *e2;

    mutex_lock(&smi_watchers_mutex);

    mutex_lock(&ipmi_interfaces_mutex);
//统计有多少个ipmi_interfaces,然后都加到to_deliver
    /* Build a list of things to deliver. */
    list_for_each_entry(intf, &ipmi_interfaces, link) {
        if (intf->intf_num == -1)
            continue;
        e = kmalloc(sizeof(*e), GFP_KERNEL);
        if (!e)
            goto out_err;
        kref_get(&intf->refcount);
        e->intf = intf;
        e->intf_num = intf->intf_num;
        list_add_tail(&e->link, &to_deliver);
    }
//将所有的ipmi_smi_watcher *watcher 添加到smi_watchers中
    /* We will succeed, so add it to the list. */
    list_add(&watcher->link, &smi_watchers);

    mutex_unlock(&ipmi_interfaces_mutex);
//为to_deliver中的每个ipmi_interfaces调用new_smi,这里的new_smi就等于smi_watcher
    list_for_each_entry_safe(e, e2, &to_deliver, link) {
        list_del(&e->link);
        watcher->new_smi(e->intf_num, e->intf->si_dev);
        kref_put(&e->intf->refcount, intf_free);
        kfree(e);
    }

    mutex_unlock(&smi_watchers_mutex);

    return 0;

}
可以看到ipmi_smi_watcher_register 就做了两件事情,一件是将smi_watcher 添加到smi_watchers 这里list中,这样就可以通过smi_watchers得到所有的smi_watcher。其次是为每个ipmi_interfaces调用smi_watcher->new_smi
static struct ipmi_smi_watcher smi_watcher = {
    .owner    = THIS_MODULE,
    .new_smi  = ipmi_new_smi,
    .smi_gone = ipmi_smi_gone
};
static void ipmi_new_smi(int if_num, struct device *device)
{
    ipmi_register_watchdog(if_num);
}
static void ipmi_register_watchdog(int ipmi_intf)
{
    int rv = -EBUSY;

    if (watchdog_user)
        goto out;

    if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf))
        goto out;

    watchdog_ifnum = ipmi_intf;

    rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user);
    if (rv < 0) {
        printk(KERN_CRIT PFX "Unable to register with ipmi\n");
        goto out;
    }
//为watchdog_user 赋值ipmi_version_major和ipmi_version_minor
    ipmi_get_version(watchdog_user,
             &ipmi_version_major,
             &ipmi_version_minor);
//注册一个字符设备,这样就可以在user space控制IPMI watchdog
    rv = misc_register(&ipmi_wdog_miscdev);
    if (rv < 0) {
        ipmi_destroy_user(watchdog_user);
        watchdog_user = NULL;
        printk(KERN_CRIT PFX "Unable to register misc device\n");
    }

 out:
    if ((start_now) && (rv == 0)) {
        /* Run from startup, so start the timer now. */
        start_now = 0; /* Disable this function after first startup. */
        ipmi_watchdog_state = action_val;
如果前面都成功了,则开始IPMI watchdog
        ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
        printk(KERN_INFO PFX "Starting now!\n");
    } else {
        /* Stop the timer now. */
        ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
        ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
    }
}
整个IPMI watchdog的工作流程如下,正常情况下就一直这么循环下去
ipmi_set_timeout->i_ipmi_set_timeout->ipmi_heartbeat->ipmi_set_timeout
static int ipmi_set_timeout(int do_heartbeat)
{
    int send_heartbeat_now;
    int rv;


    /* We can only send one of these at a time. */
    mutex_lock(&set_timeout_lock);

    atomic_set(&set_timeout_tofree, 2);
// 给BMC 发送命令,即使BMC 不回复也会timeout
    rv = i_ipmi_set_timeout(&set_timeout_smi_msg,
                &set_timeout_recv_msg,
                &send_heartbeat_now);
    if (rv) {
        mutex_unlock(&set_timeout_lock);
        goto out;
    }
//等待BMC 返回
    wait_for_completion(&set_timeout_wait);

    mutex_unlock(&set_timeout_lock);

    if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
        || ((send_heartbeat_now)
        && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
设置下一次timeout的时间
        rv = ipmi_heartbeat();

out:
    return rv;
}
我们继续看看如果给BMC 发送消息
static int i_ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
                  struct ipmi_recv_msg *recv_msg,
                  int                  *send_heartbeat_now)
{
    struct kernel_ipmi_msg            msg;
    unsigned char                     data[6];
    int                               rv;
    struct ipmi_system_interface_addr addr;
    int                               hbnow = 0;


    /* These can be cleared as we are setting the timeout. */
    pretimeout_since_last_heartbeat = 0;

    data[0] = 0;
    WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);

    if ((ipmi_version_major > 1)
        || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) {
        /* This is an IPMI 1.5-only feature. */
        data[0] |= WDOG_DONT_STOP_ON_SET;
    } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
        /*
         * In ipmi 1.0, setting the timer stops the watchdog, we
         * need to start it back up again.
         */
        hbnow = 1;
    }

    data[1] = 0;
    WDOG_SET_TIMEOUT_ACT(data[1], ipmi_watchdog_state);
    if ((pretimeout > 0) && (ipmi_watchdog_state != WDOG_TIMEOUT_NONE)) {
        WDOG_SET_PRETIMEOUT_ACT(data[1], preaction_val);
        data[2] = pretimeout;
    } else {
        WDOG_SET_PRETIMEOUT_ACT(data[1], WDOG_PRETIMEOUT_NONE);
        data[2] = 0; /* No pretimeout. */
    }
    data[3] = 0;
    WDOG_SET_TIMEOUT(data[4], data[5], timeout);
// 给ipmi_system_interface_addr addr赋值
    addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
    addr.channel = IPMI_BMC_CHANNEL;
    addr.lun = 0;
//给kernel_ipmi_msg            msg 赋值
    msg.netfn = 0x06;
    msg.cmd = IPMI_WDOG_SET_TIMER;
    msg.data = data;
    msg.data_len = sizeof(data);
//在ipmi_request_supply_msgs 中最终是调用smi_send来给BMC 发送命令
    rv = ipmi_request_supply_msgs(watchdog_user,
                      (struct ipmi_addr *) &addr,
                      0,
                      &msg,
                      NULL,
                      smi_msg,
                      recv_msg,
                      1);
    if (rv) {
        printk(KERN_WARNING PFX "set timeout error: %d\n",
               rv);
    }

    if (send_heartbeat_now)
        *send_heartbeat_now = hbnow;

    return rv;
}
从smi_send 最终调用handlers->sender来发送
static void smi_send(ipmi_smi_t intf, const struct ipmi_smi_handlers *handlers,
             struct ipmi_smi_msg *smi_msg, int priority)
{
    int run_to_completion = intf->run_to_completion;

    if (run_to_completion) {
        smi_msg = smi_add_send_msg(intf, smi_msg, priority);
    } else {
        unsigned long flags;

        spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
        smi_msg = smi_add_send_msg(intf, smi_msg, priority);
        spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
    }

    if (smi_msg)
        handlers->sender(intf->send_info, smi_msg);
}

static const struct ipmi_smi_handlers handlers = {
    .owner                  = THIS_MODULE,
    .start_processing       = smi_start_processing,
    .get_smi_info        = get_smi_info,
    .sender            = sender,
}
sender->check_start_timer_thread
static void check_start_timer_thread(struct smi_info *smi_info)
{
    if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) {
//trgger timer 来发送和接受
        smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
//trigger thread来发送和接受
        if (smi_info->thread)
            wake_up_process(smi_info->thread);

        start_next_msg(smi_info);
最终timer和thread中都是调用smi_event_handler 来处理消息
        smi_event_handler(smi_info, 0);
    }
}



原创粉丝点击