Linux存储IO栈(1)-- 内核对象与对象集

来源:互联网 发布:剑灵龙男捏脸数据大全 编辑:程序博客网 时间:2024/06/10 09:11

Linux内核对象和对象集

内核对象作为Linux设备驱动模型的基础,主要是抽象和封装总线、设备、驱动、类和接口之间的关系具体实现的相关代码,并在sysfs中呈现。主要抽象成kobject和kset结构:

struct kobject {    const char      *name;   //在sysfs中显示的名称    struct list_head    entry;   //链入kset的kobj链表    struct kobject      *parent; //指向父kobject,用于表示树形结构    struct kset     *kset;   //指向链入的kset    struct kobj_type    *ktype;  //抽象kobject的通用方法和属性    struct kernfs_node  *sd;     //sysfs directory entry     struct kref     kref;    //引用计数 #ifdef CONFIG_DEBUG_KOBJECT_RELEASE    struct delayed_work release; #endif    unsigned int state_initialized:1;  //是否被初始化    unsigned int state_in_sysfs:1;     //是否被添加到sysfs    unsigned int state_add_uevent_sent:1; //是否发送ADD事件到用户空间    unsigned int state_remove_uevent_sent:1; //是否发送REMOVE事件到用户空间    unsigned int uevent_suppress:1; //事件是否被抑制};

在kobject结构中ktype域是对kobject一些通用方法和属性进行封装:

struct kobj_type {    void (*release)(struct kobject *kobj); //释放kobject结构时回调    const struct sysfs_ops *sysfs_ops; //sysfs的操作函数    struct attribute **default_attrs;  //默认属性        //命名空间相关操作    const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);    const void *(*namespace)(struct kobject *kobj);};

kset是一组kobject的集合,通过kset可以遍历这组kobject,如SCSI子系统中,设备是一种kobject,通过设备集kset,可以遍历所有的设备。

/** * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem. * * A kset defines a group of kobjects.  They can be individually * different "types" but overall these kobjects all want to be grouped * together and operated on in the same manner.  ksets are used to * define the attribute callbacks and other common events that happen to * a kobject. * * @list: the list of all kobjects for this kset * @list_lock: a lock for iterating over the kobjects * @kobj: the embedded kobject for this kset (recursion, isn't it fun...) * @uevent_ops: the set of uevent operations for this kset.  These are * called whenever a kobject has something happen to it so that the kset * can add new environment variables, or filter out the uevents if so * desired. */struct kset {    struct list_head list; //链入kset的kobject链表    spinlock_t list_lock;  //遍历链表是的自旋锁    struct kobject kobj;   //本身可以当做kobject对待    const struct kset_uevent_ops *uevent_ops; //发送uevent事件的回调函数};

在发送事件到用户空间时,可以回调kset_uevent_ops中的3个回调函数

struct kset_uevent_ops {    int (* const filter)(struct kset *kset, struct kobject *kobj);    const char *(* const name)(struct kset *kset, struct kobject *kobj);    int (* const uevent)(struct kset *kset, struct kobject *kobj,              struct kobj_uevent_env *env);};
  • filter:在发送事件之前的过滤某些事件。
  • name: 获取名称。
  • uevent:设置uevent需要的环境变量。

内核对象关系

内核对象相关操作

void kobject_init(struct kobject *kobj, struct kobj_type *ktype);int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...);int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...);void kobject_del(struct kobject *kobj);struct kobject *  kobject_create(void);struct kobject * kobject_create_and_add(const char *name, struct kobject *parent);int kobject_rename(struct kobject *, const char *new_name);int kobject_move(struct kobject *, struct kobject *);struct kobject *kobject_get(struct kobject *kobj);void kobject_put(struct kobject *kobj);const void *kobject_namespace(struct kobject *kobj);char *kobject_get_path(struct kobject *kobj, gfp_t flag);

内核对象创建及初始化

初始化流程主要在kobject_init:

/** * kobject_init - initialize a kobject structure * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * * This function will properly initialize a kobject such that it can then * be passed to the kobject_add() call. * * After this function is called, the kobject MUST be cleaned up by a call * to kobject_put(), not by a call to kfree directly to ensure that all of * the memory is cleaned up properly. */void kobject_init(struct kobject *kobj, struct kobj_type *ktype){    char *err_str;    if (!kobj) {        err_str = "invalid kobject pointer!";        goto error;    }    if (!ktype) {        err_str = "must have a ktype to be initialized properly!\n";        goto error;    }    if (kobj->state_initialized) {  //避免重复初始化        /* do not error out as sometimes we can recover */        printk(KERN_ERR "kobject (%p): tried to init an initialized "               "object, something is seriously wrong.\n", kobj);        dump_stack();    }    kobject_init_internal(kobj); //完成初始化的主要函数    kobj->ktype = ktype;    return;error:    printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str);    dump_stack();}EXPORT_SYMBOL(kobject_init);

由上面函数可以看出由kobject_init_internal完成初始化:

static void kobject_init_internal(struct kobject *kobj){    if (!kobj)        return;    kref_init(&kobj->kref);    INIT_LIST_HEAD(&kobj->entry);    kobj->state_in_sysfs = 0;    kobj->state_add_uevent_sent = 0;    kobj->state_remove_uevent_sent = 0;    kobj->state_initialized = 1;}

kobject_create函数仅仅是在调用kobject_init之前,先分配kobject空间。在kobject初始化之后,需要调用kobject_add将kobject添加到sysfs中。

/** * kobject_add - the main kobject add function * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. * * The kobject name is set and added to the kobject hierarchy in this * function. * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the * kobject associated with the kset assigned to this kobject.  If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * * If this function returns an error, kobject_put() must be called to * properly clean up the memory associated with the object. * Under no instance should the kobject that is passed to this function * be directly freed with a call to kfree(), that can leak memory. * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. */int kobject_add(struct kobject *kobj, struct kobject *parent,        const char *fmt, ...){    va_list args;    int retval;    if (!kobj)        return -EINVAL;    if (!kobj->state_initialized) { //add之前需要初始化        printk(KERN_ERR "kobject '%s' (%p): tried to add an "               "uninitialized object, something is seriously wrong.\n",               kobject_name(kobj), kobj);        dump_stack();        return -EINVAL;    }    va_start(args, fmt);    retval = kobject_add_varg(kobj, parent, fmt, args); //主要完成add操作    va_end(args);    return retval;}EXPORT_SYMBOL(kobject_add);

kobject_add_varg/kobject_add_internal主要完成将kobject添加到sysfs的操作:

static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,                       struct kobject *parent,                       const char *fmt, va_list vargs){    int retval;        //设置kobject在sysfs中显示的名称    retval = kobject_set_name_vargs(kobj, fmt, vargs);    if (retval) {        printk(KERN_ERR "kobject: can not set name properly!\n");        return retval;    }    kobj->parent = parent;    return kobject_add_internal(kobj); //主要实现函数}static int kobject_add_internal(struct kobject *kobj){    int error = 0;    struct kobject *parent;    if (!kobj)        return -ENOENT;    if (!kobj->name || !kobj->name[0]) {        WARN(1, "kobject: (%p): attempted to be registered with empty "             "name!\n", kobj);        return -EINVAL;    }    parent = kobject_get(kobj->parent); //增加父对象的引用计数    /* join kset if set, use it as parent if we do not already have one */    if (kobj->kset) { //如果设置了kset,而没有设置parent,则把kset的kobject设置为parent        if (!parent)            parent = kobject_get(&kobj->kset->kobj);        kobj_kset_join(kobj);        kobj->parent = parent;    }    pr_debug("kobject: '%s' (%p): %s: parent: '%s', set: '%s'\n",         kobject_name(kobj), kobj, __func__,         parent ? kobject_name(parent) : "<NULL>",         kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>");    error = create_dir(kobj);  //创建sysfs对应的目录和属性文件    if (error) {  //出错回滚        kobj_kset_leave(kobj);        kobject_put(parent);        kobj->parent = NULL;        /* be noisy on error issues */        if (error == -EEXIST)            WARN(1, "%s failed for %s with "                 "-EEXIST, don't try to register things with "                 "the same name in the same directory.\n",                 __func__, kobject_name(kobj));        else            WARN(1, "%s failed for %s (error: %d parent: %s)\n",                 __func__, kobject_name(kobj), error,                 parent ? kobject_name(parent) : "'none'");    } else        kobj->state_in_sysfs = 1; //更新标志位    return error;}

由create_dir在sysfs创建真实的目录和文件,这点有下一篇sysfs详细描述。理解了kobject_init和kobject_add之后,由名字可以知道下面函数kobject_init_and_add和kobject_create_and_add

内核对象释放

调用kobject_del将对kobject释放:

/** * kobject_del - unlink kobject from hierarchy. * @kobj: object. */void kobject_del(struct kobject *kobj){    struct kernfs_node *sd;    if (!kobj)        return;    sd = kobj->sd;    sysfs_remove_dir(kobj); //删除kobject在sysfs中的目录    sysfs_put(sd);    kobj->state_in_sysfs = 0; //设置标志位    kobj_kset_leave(kobj);  //kobject脱离kset链表    kobject_put(kobj->parent); //调用kobject_release释放    kobj->parent = NULL;}EXPORT_SYMBOL(kobject_del);/** * kobject_put - decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). */void kobject_put(struct kobject *kobj){    if (kobj) {        if (!kobj->state_initialized)            WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "                   "initialized, yet kobject_put() is being "                   "called.\n", kobject_name(kobj), kobj);        kref_put(&kobj->kref, kobject_release);  //调用kobject_release    }}EXPORT_SYMBOL(kobject_put);static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)){    return kref_sub(kref, 1, release);}static inline int kref_sub(struct kref *kref, unsigned int count,         void (*release)(struct kref *kref)){    WARN_ON(release == NULL);    if (atomic_sub_and_test((int) count, &kref->refcount)) {        release(kref); //调用kobject_release        return 1;    }    return 0;}

根据上面的代码追踪,得知kobject_release才是释放kobject的主角:

static void kobject_release(struct kref *kref){    struct kobject *kobj = container_of(kref, struct kobject, kref);#ifdef CONFIG_DEBUG_KOBJECT_RELEASE    unsigned long delay = HZ + HZ * (get_random_int() & 0x3);    pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n",         kobject_name(kobj), kobj, __func__, kobj->parent, delay);    INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup);    //延迟调用kobject_delayed_cleanup进行清理    schedule_delayed_work(&kobj->release, delay);#else    kobject_cleanup(kobj);  //清理#endif}

如果在内核编译时指定CONFIG_DEBUG_KOBJECT_RELEASE,则使用延迟release方式调用kobject_delayed_cleanup,否则直接调用kobject_cleanup。

#ifdef CONFIG_DEBUG_KOBJECT_RELEASEstatic void kobject_delayed_cleanup(struct work_struct *work){    kobject_cleanup(container_of(to_delayed_work(work), //最终还是调用                     struct kobject, release));}#endif/* * kobject_cleanup - free kobject resources. * @kobj: object to cleanup */static void kobject_cleanup(struct kobject *kobj){    struct kobj_type *t = get_ktype(kobj);    const char *name = kobj->name;    pr_debug("kobject: '%s' (%p): %s, parent %p\n",         kobject_name(kobj), kobj, __func__, kobj->parent);    if (t && !t->release)        pr_debug("kobject: '%s' (%p): does not have a release() "             "function, it is broken and must be fixed.\n",             kobject_name(kobj), kobj);    /* send "remove" if the caller did not do it but sent "add" */    if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {        pr_debug("kobject: '%s' (%p): auto cleanup 'remove' event\n",             kobject_name(kobj), kobj);        kobject_uevent(kobj, KOBJ_REMOVE); //仅仅发送一次REMOVE消息    }    /* remove from sysfs if the caller did not do it */    if (kobj->state_in_sysfs) {        pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n",             kobject_name(kobj), kobj);        kobject_del(kobj); //如果调用者没有清理sysfs,则清理    }    if (t && t->release) {        pr_debug("kobject: '%s' (%p): calling ktype release\n",             kobject_name(kobj), kobj);        t->release(kobj); //调用kobj_type的release回调函数    }    /* free name if we allocated it */    if (name) {        pr_debug("kobject: '%s': free name\n", name);        kfree_const(name);    }}

内核对象集相关操作

void kset_init(struct kset *kset);struct kset *kset_create(const char *name, const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj);int kset_register(struct kset *kset);void kset_unregister(struct kset *kset);struct kset * kset_create_and_add(const char *name, const struct kset_uevent_ops *u, struct kobject *parent_kobj);

内核对象集创建及初始化

内核对象集由kset_create创建

/** * kset_create - create a struct kset dynamically * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset * @parent_kobj: the parent kobject of this kset, if any. * * This function creates a kset structure dynamically.  This structure can * then be registered with the system and show up in sysfs with a call to * kset_register().  When you are finished with this structure, if * kset_register() has been called, call kset_unregister() and the * structure will be dynamically freed when it is no longer being used. * * If the kset was not able to be created, NULL will be returned. */static struct kset *kset_create(const char *name,                const struct kset_uevent_ops *uevent_ops,                struct kobject *parent_kobj){    struct kset *kset;    int retval;    kset = kzalloc(sizeof(*kset), GFP_KERNEL);  //分配空间    if (!kset)        return NULL;    retval = kobject_set_name(&kset->kobj, "%s", name); //设置kset在sysfs中的名字    if (retval) {        kfree(kset);        return NULL;    }    kset->uevent_ops = uevent_ops;   //设置uevent_ops    kset->kobj.parent = parent_kobj; //设置kset的父对象    /*     * The kobject of this kset will have a type of kset_ktype and belong to     * no kset itself.  That way we can properly free it when it is     * finished being used.     */    kset->kobj.ktype = &kset_ktype;  //设置kobj_type    kset->kobj.kset = NULL;    return kset;}

内核对象集由kset_init执行初始化:

/** * kset_init - initialize a kset for use * @k: kset */void kset_init(struct kset *k){    kobject_init_internal(&k->kobj);  //这里初始化    INIT_LIST_HEAD(&k->list);    spin_lock_init(&k->list_lock);}static void kobject_init_internal(struct kobject *kobj){    if (!kobj)        return;    kref_init(&kobj->kref);    INIT_LIST_HEAD(&kobj->entry);    kobj->state_in_sysfs = 0;        //设置对应标志位    kobj->state_add_uevent_sent = 0;    kobj->state_remove_uevent_sent = 0;    kobj->state_initialized = 1;}

初始化kset之后,调用kset_register,将kset添加到sysfs:

/** * kset_register - initialize and add a kset. * @k: kset. */int kset_register(struct kset *k){    int err;    if (!k)        return -EINVAL;    kset_init(k);    err = kobject_add_internal(&k->kobj); //完成register动作,前面已说明    if (err)        return err;    kobject_uevent(&k->kobj, KOBJ_ADD); //发送ADD事件到用户空间    return 0;}EXPORT_SYMBOL(kset_register);

经过kset_create, kset_init和kset_register之后,kset已初始化并添加完成。当然kset_create_and_add包含了这三个函数。

内核对象集释放

内核对象的释放过程与kobject的释放过程类似,由kset_unregister完成:

/** * kset_unregister - remove a kset. * @k: kset. */void kset_unregister(struct kset *k){    if (!k)        return;    kobject_del(&k->kobj);  //删除sysfs的目录和属性文件,前面已说明    kobject_put(&k->kobj);  //与kobject释放过程一致}EXPORT_SYMBOL(kset_unregister);

发送事件到用户空间

由前面的代码可以看到无论kobject或是kset,都会向用户空间发送事件,由kobject_uevent函数通过设置环境变量的方式完成:

struct kobj_uevent_env {    char *argv[3];                //user_helper使用的命令    char *envp[UEVENT_NUM_ENVP];  //环境变量数组    int envp_idx;                 //当前环境变量索引    char buf[UEVENT_BUFFER_SIZE]; //环境变量数据缓冲区    int buflen;};/** * kobject_uevent - notify userspace by sending an uevent * * @action: action that is happening * @kobj: struct kobject that the action is happening to * * Returns 0 if kobject_uevent() is completed with success or the * corresponding error when it fails. */int kobject_uevent(struct kobject *kobj, enum kobject_action action){    return kobject_uevent_env(kobj, action, NULL); //实际完成发送函数}EXPORT_SYMBOL_GPL(kobject_uevent);/** * kobject_uevent_env - send an uevent with environmental data * * @action: action that is happening * @kobj: struct kobject that the action is happening to * @envp_ext: pointer to environmental data * * Returns 0 if kobject_uevent_env() is completed with success or the * corresponding error when it fails. */int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,               char *envp_ext[]){    struct kobj_uevent_env *env;    const char *action_string = kobject_actions[action];    const char *devpath = NULL;    const char *subsystem;    struct kobject *top_kobj;    struct kset *kset;    const struct kset_uevent_ops *uevent_ops;    int i = 0;    int retval = 0;#ifdef CONFIG_NET    struct uevent_sock *ue_sk;#endif    pr_debug("kobject: '%s' (%p): %s\n",         kobject_name(kobj), kobj, __func__);    /* search the kset we belong to */    top_kobj = kobj;    while (!top_kobj->kset && top_kobj->parent)  //寻找最近的kset,kset中有鍀event_ops        top_kobj = top_kobj->parent;    if (!top_kobj->kset) {        pr_debug("kobject: '%s' (%p): %s: attempted to send uevent "             "without kset!\n", kobject_name(kobj), kobj,             __func__);        return -EINVAL;    }    kset = top_kobj->kset;    uevent_ops = kset->uevent_ops;  //使用kset中的uevent_ops执行发送操作    /* skip the event, if uevent_suppress is set*/    if (kobj->uevent_suppress) {  //跳过设置为uevent_suppress的kobject        pr_debug("kobject: '%s' (%p): %s: uevent_suppress "                 "caused the event to drop!\n",                 kobject_name(kobj), kobj, __func__);        return 0;    }    /* skip the event, if the filter returns zero. */    if (uevent_ops && uevent_ops->filter)  //调用uevent_ops的filter函数        if (!uevent_ops->filter(kset, kobj)) {            pr_debug("kobject: '%s' (%p): %s: filter function "                 "caused the event to drop!\n",                 kobject_name(kobj), kobj, __func__);            return 0;        }    /* originating subsystem */    if (uevent_ops && uevent_ops->name)  //确定发送事件的kobject名字        subsystem = uevent_ops->name(kset, kobj);    else        subsystem = kobject_name(&kset->kobj);    if (!subsystem) {        pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the "             "event to drop!\n", kobject_name(kobj), kobj,             __func__);        return 0;    }    /* environment buffer */    env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); //分配kobj_uevent_env    if (!env)        return -ENOMEM;    /* complete object path */    devpath = kobject_get_path(kobj, GFP_KERNEL);    if (!devpath) {        retval = -ENOENT;        goto exit;    }    /* default keys 添加环境变量 */    retval = add_uevent_var(env, "ACTION=%s", action_string);    if (retval)        goto exit;    retval = add_uevent_var(env, "DEVPATH=%s", devpath);    if (retval)        goto exit;    retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem);    if (retval)        goto exit;    /* keys passed in from the caller */    if (envp_ext) {        for (i = 0; envp_ext[i]; i++) {            retval = add_uevent_var(env, "%s", envp_ext[i]);            if (retval)                goto exit;        }    }    /* let the kset specific function add its stuff */    if (uevent_ops && uevent_ops->uevent) { //调用uevent回调函数,添加子系统特定的环境变量        retval = uevent_ops->uevent(kset, kobj, env);        if (retval) {            pr_debug("kobject: '%s' (%p): %s: uevent() returned "                 "%d\n", kobject_name(kobj), kobj,                 __func__, retval);            goto exit;        }    }    /*     * Mark "add" and "remove" events in the object to ensure proper     * events to userspace during automatic cleanup. If the object did     * send an "add" event, "remove" will automatically generated by     * the core, if not already done by the caller.     */    if (action == KOBJ_ADD)        kobj->state_add_uevent_sent = 1;    else if (action == KOBJ_REMOVE)        kobj->state_remove_uevent_sent = 1;    mutex_lock(&uevent_sock_mutex);    /* we will send an event, so request a new sequence number */    retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)++uevent_seqnum);    if (retval) {        mutex_unlock(&uevent_sock_mutex);        goto exit;    }#if defined(CONFIG_NET)  //如果在编译时指定CONFIG_NET,使用netlink发送    /* send netlink message */    list_for_each_entry(ue_sk, &uevent_sock_list, list) {        struct sock *uevent_sock = ue_sk->sk;        struct sk_buff *skb;        size_t len;        if (!netlink_has_listeners(uevent_sock, 1))            continue;        /* allocate message with the maximum possible size */        len = strlen(action_string) + strlen(devpath) + 2;        skb = alloc_skb(len + env->buflen, GFP_KERNEL);        if (skb) {            char *scratch;            /* add header */            scratch = skb_put(skb, len);            sprintf(scratch, "%s@%s", action_string, devpath);            /* copy keys to our continuous event payload buffer */            for (i = 0; i < env->envp_idx; i++) {                len = strlen(env->envp[i]) + 1;                scratch = skb_put(skb, len);                strcpy(scratch, env->envp[i]);            }            NETLINK_CB(skb).dst_group = 1;            retval = netlink_broadcast_filtered(uevent_sock, skb, //使用netlink多播发送                                0, 1, GFP_KERNEL,                                kobj_bcast_filter,                                kobj);            /* ENOBUFS should be handled in userspace */            if (retval == -ENOBUFS || retval == -ESRCH)                retval = 0;        } else            retval = -ENOMEM;    }#endif    mutex_unlock(&uevent_sock_mutex);#ifdef CONFIG_UEVENT_HELPER  //不能使用netlink时,使用user_helper发送    /* call uevent_helper, usually only enabled during early boot */    if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {        struct subprocess_info *info;        retval = add_uevent_var(env, "HOME=/");        if (retval)            goto exit;        retval = add_uevent_var(env,                    "PATH=/sbin:/bin:/usr/sbin:/usr/bin");        if (retval)            goto exit;        retval = init_uevent_argv(env, subsystem); //组装需要调用的用户空间命令和参数        if (retval)            goto exit;        retval = -ENOMEM;        info = call_usermodehelper_setup(env->argv[0], env->argv,  //调用用户空间程序/sbin/hotplug                         env->envp, GFP_KERNEL,                         NULL, cleanup_uevent_env, env);        if (info) {            retval = call_usermodehelper_exec(info, UMH_NO_WAIT);            env = NULL; /* freed by cleanup_uevent_env */        }    }#endifexit:    kfree(devpath);    kfree(env);    return retval;}EXPORT_SYMBOL_GPL(kobject_uevent_env);
1 0