Linux内核笔记 -- 锁

来源:互联网 发布:ubuntu win7 系统时间 编辑:程序博客网 时间:2024/06/06 03:57

在多核或者多个线程执行过程中,需要对多运行时共用的数据进行保护,防止多个运行环境在同时操作数据时因为按照不确定的顺序操作内存数据而对数据造成破坏。对于一个多运行时的处理机而言,要实现对数据的保护就不能存在对数据的同时访问,这时就要使用机器提供的原子操作,在一个指令的时间之内完成对内存数据的修改并将至写入内存中。基于这个功能可以实现比指令略粗一点的自旋锁,以及更粗粒度一点的信号量和互斥量等功能。

自旋锁

在使用自旋锁时要保存并禁用中断、禁止抢占以避免锁过程被打断,因为加锁和解锁本身是一个非常快的操作,但是在处理这个过程如果被打断去执行IRQ操作然后再被其他高优先级运行时所抢占的话,既影响了锁的高效执行,更主要的是会因为数据不同步而造成锁死等错误。通过锁保证在一个运行时在上锁之后和解锁之前对数据的操作都不会因为其他运行时的干预而造成错误,其他运行时通过在处理器上自旋(一组Nop指令)来等待其运行。自旋锁在一方面可以保护数据。另一方面,自旋锁也可以用来同步两个运行时,一个运行时可以通过占用锁使另外一个运行时在请求该锁时等待,当锁释放之后两个运行时的执行位置可以在一定粒度下实现同步。

#define BUILD_LOCK_OPS(op, locktype)                    \void __lockfunc __raw_##op##_lock(locktype##_t *lock)           \{                                   \    for (;;) {                          \        preempt_disable();                  \        if (likely(do_raw_##op##_trylock(lock)))        \            break;                      \        preempt_enable();                   \                                    \        if (!(lock)->break_lock)                \            (lock)->break_lock = 1;             \        while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\            arch_##op##_relax(&lock->raw_lock);     \    }                               \    (lock)->break_lock = 0;                     \}                                   \                                    \unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)  \{                                   \    unsigned long flags;                        \                                    \    for (;;) {                          \        preempt_disable();                  \        local_irq_save(flags);                  \        if (likely(do_raw_##op##_trylock(lock)))        \            break;                      \        local_irq_restore(flags);               \        preempt_enable();                   \                                    \        if (!(lock)->break_lock)                \            (lock)->break_lock = 1;             \        while (!raw_##op##_can_lock(lock) && (lock)->break_lock)\            arch_##op##_relax(&lock->raw_lock);     \    }                               \    (lock)->break_lock = 0;                     \    return flags;                           \}                                   \                                    \void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock)       \{                                   \    _raw_##op##_lock_irqsave(lock);                 \}                                   \                                    \void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock)        \{                                   \    unsigned long flags;                        \                                    \    /*                          */  \    /* Careful: we must exclude softirqs too, hence the */  \    /* irq-disabling. We use the generic preemption-aware   */  \    /* function:                        */  \    /**/                                \    flags = _raw_##op##_lock_irqsave(lock);             \    local_bh_disable();                     \    local_irq_restore(flags);                   \}                                   \

[:/linux4.13.12/kernel/locking/spinlock.c]

Mutex 和 Semaphore

建立在自旋锁和硬件指令提供的Atomic功能之上,我们就可以根据需求实现更加灵活的锁机制。在对线程加锁之后其他线程并不一定要白白的占用机器时间来等待同步,将线程挂起来,等到它可以拿到锁的时候再放出来,这样就可以吧时间腾出来给其他的线程来使用。通过引入信号量可以限制多个运行时对同一块数据的访问,当一个运行时需要访问数据时信号量递减一级,释放时递增一级,当递减到零时后面请求资源的线程将被挂起来,打水是一个比较直观的例子(一个水井、M个水桶和一个水缸),因为桶的数量是有限的,所以要防止因为瞬间大量的请求阻塞或者破坏线程的执行(就像桶资源是受限制的)。

static __always_inline bool __mutex_trylock_fast(struct mutex *lock){    unsigned long curr = (unsigned long)current;    if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))        return true;    return false;}static __always_inline bool __mutex_unlock_fast(struct mutex *lock){    unsigned long curr = (unsigned long)current;    if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr)        return true;    return false;}/* * Lock a mutex (possibly interruptible), slowpath: */static __always_inline int __sched__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,            struct lockdep_map *nest_lock, unsigned long ip,            struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx){    struct mutex_waiter waiter;    bool first = false;    struct ww_mutex *ww;    int ret;    might_sleep();    ww = container_of(lock, struct ww_mutex, base);    if (use_ww_ctx && ww_ctx) {        if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))            return -EALREADY;    }    preempt_disable();    mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);    if (__mutex_trylock(lock) ||        mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {        /* got the lock, yay! */        lock_acquired(&lock->dep_map, ip);        if (use_ww_ctx && ww_ctx)            ww_mutex_set_context_fastpath(ww, ww_ctx);        preempt_enable();        return 0;    }    spin_lock(&lock->wait_lock);    /*     * After waiting to acquire the wait_lock, try again.     */    if (__mutex_trylock(lock)) {        if (use_ww_ctx && ww_ctx)            __ww_mutex_wakeup_for_backoff(lock, ww_ctx);        goto skip_wait;    }    debug_mutex_lock_common(lock, &waiter);    debug_mutex_add_waiter(lock, &waiter, current);    lock_contended(&lock->dep_map, ip);    if (!use_ww_ctx) {        /* add waiting tasks to the end of the waitqueue (FIFO): */        list_add_tail(&waiter.list, &lock->wait_list);#ifdef CONFIG_DEBUG_MUTEXES        waiter.ww_ctx = MUTEX_POISON_WW_CTX;#endif    } else {        /* Add in stamp order, waking up waiters that must back off. */        ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);        if (ret)            goto err_early_backoff;        waiter.ww_ctx = ww_ctx;    }    waiter.task = current;    if (__mutex_waiter_is_first(lock, &waiter))        __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);    set_current_state(state);    for (;;) {        /*         * Once we hold wait_lock, we're serialized against         * mutex_unlock() handing the lock off to us, do a trylock         * before testing the error conditions to make sure we pick up         * the handoff.         */        if (__mutex_trylock(lock))            goto acquired;        /*         * Check for signals and wound conditions while holding         * wait_lock. This ensures the lock cancellation is ordered         * against mutex_unlock() and wake-ups do not go missing.         */        if (unlikely(signal_pending_state(state, current))) {            ret = -EINTR;            goto err;        }        if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) {            ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx);            if (ret)                goto err;        }        spin_unlock(&lock->wait_lock);        schedule_preempt_disabled();        /*         * ww_mutex needs to always recheck its position since its waiter         * list is not FIFO ordered.         */        if ((use_ww_ctx && ww_ctx) || !first) {            first = __mutex_waiter_is_first(lock, &waiter);            if (first)                __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);        }        set_current_state(state);        /*         * Here we order against unlock; we must either see it change         * state back to RUNNING and fall through the next schedule(),         * or we must see its unlock and acquire.         */        if (__mutex_trylock(lock) ||            (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter)))            break;        spin_lock(&lock->wait_lock);    }    spin_lock(&lock->wait_lock);acquired:    __set_current_state(TASK_RUNNING);    mutex_remove_waiter(lock, &waiter, current);    if (likely(list_empty(&lock->wait_list)))        __mutex_clear_flag(lock, MUTEX_FLAGS);    debug_mutex_free_waiter(&waiter);skip_wait:    /* got the lock - cleanup and rejoice! */    lock_acquired(&lock->dep_map, ip);    if (use_ww_ctx && ww_ctx)        ww_mutex_set_context_slowpath(ww, ww_ctx);    spin_unlock(&lock->wait_lock);    preempt_enable();    return 0;err:    __set_current_state(TASK_RUNNING);    mutex_remove_waiter(lock, &waiter, current);err_early_backoff:    spin_unlock(&lock->wait_lock);    debug_mutex_free_waiter(&waiter);    mutex_release(&lock->dep_map, 1, ip);    preempt_enable();    return ret;}static int __sched__mutex_lock(struct mutex *lock, long state, unsigned int subclass,         struct lockdep_map *nest_lock, unsigned long ip){    return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);}void __sched mutex_lock(struct mutex *lock){    might_sleep();    if (!__mutex_trylock_fast(lock))        __mutex_lock_slowpath(lock);}/* * Release the lock, slowpath: */static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip){    struct task_struct *next = NULL;    DEFINE_WAKE_Q(wake_q);    unsigned long owner;    mutex_release(&lock->dep_map, 1, ip);    /*     * Release the lock before (potentially) taking the spinlock such that     * other contenders can get on with things ASAP.     *     * Except when HANDOFF, in that case we must not clear the owner field,     * but instead set it to the top waiter.     */    owner = atomic_long_read(&lock->owner);    for (;;) {        unsigned long old;#ifdef CONFIG_DEBUG_MUTEXES        DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);        DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);#endif        if (owner & MUTEX_FLAG_HANDOFF)            break;        old = atomic_long_cmpxchg_release(&lock->owner, owner,                          __owner_flags(owner));        if (old == owner) {            if (owner & MUTEX_FLAG_WAITERS)                break;            return;        }        owner = old;    }    spin_lock(&lock->wait_lock);    debug_mutex_unlock(lock);    if (!list_empty(&lock->wait_list)) {        /* get the first entry from the wait-list: */        struct mutex_waiter *waiter =            list_first_entry(&lock->wait_list,                     struct mutex_waiter, list);        next = waiter->task;        debug_mutex_wake_waiter(lock, waiter);        wake_q_add(&wake_q, next);    }    if (owner & MUTEX_FLAG_HANDOFF)        __mutex_handoff(lock, next);    spin_unlock(&lock->wait_lock);    wake_up_q(&wake_q);}void __sched mutex_unlock(struct mutex *lock){#ifndef CONFIG_DEBUG_LOCK_ALLOC    if (__mutex_unlock_fast(lock))        return;#endif    __mutex_unlock_slowpath(lock, _RET_IP_);}

[:/linux4.13.12/kernel/locking/mutex.c]
上面是Mutex的主要实现部分,主要过程就是检查Mutex是不是被占用,如果它没有被占用就直接走快速通道,如果被占用了就走普通通道(将自己挂在锁的等待链表上),解锁过程也是如此。

/** * down - acquire the semaphore * @sem: the semaphore to be acquired * * Acquires the semaphore.  If no more tasks are allowed to acquire the * semaphore, calling this function will put the task to sleep until the * semaphore is released. * * Use of this function is deprecated, please use down_interruptible() or * down_killable() instead. */void down(struct semaphore *sem){    unsigned long flags;    raw_spin_lock_irqsave(&sem->lock, flags);    if (likely(sem->count > 0))        sem->count--;    else        __down(sem);    raw_spin_unlock_irqrestore(&sem->lock, flags);}EXPORT_SYMBOL(down);/** * up - release the semaphore * @sem: the semaphore to release * * Release the semaphore.  Unlike mutexes, up() may be called from any * context and even by tasks which have never called down(). */void up(struct semaphore *sem){    unsigned long flags;    raw_spin_lock_irqsave(&sem->lock, flags);    if (likely(list_empty(&sem->wait_list)))        sem->count++;    else        __up(sem);    raw_spin_unlock_irqrestore(&sem->lock, flags);}EXPORT_SYMBOL(up);static inline int __sched __down_common(struct semaphore *sem, long state,                                long timeout){    struct semaphore_waiter waiter;    list_add_tail(&waiter.list, &sem->wait_list);    waiter.task = current;    waiter.up = false;    for (;;) {        if (signal_pending_state(state, current))            goto interrupted;        if (unlikely(timeout <= 0))            goto timed_out;        __set_current_state(state);        raw_spin_unlock_irq(&sem->lock);        timeout = schedule_timeout(timeout);        raw_spin_lock_irq(&sem->lock);        if (waiter.up)            return 0;    } timed_out:    list_del(&waiter.list);    return -ETIME; interrupted:    list_del(&waiter.list);    return -EINTR;}static noinline void __sched __down(struct semaphore *sem){    __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);}static noinline void __sched __up(struct semaphore *sem){    struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,                        struct semaphore_waiter, list);    list_del(&waiter->list);    waiter->up = true;    wake_up_process(waiter->task);}

[:/linux4.13.12/kernel/locking/semaphore.c]
上面是Semaphore的基本工作流程,在可以使用资源的时候递减信号量并允许使用,在不能使用时将信号量挂起并等待,直到可以使用时再放入运行队列。根据某些场景的需求,信号量可以具有可被唤醒或杀死等功能,具体参考/linux4.13.12/kernel/locking/semaphore.c中的实现。

锁的扩展设计

上述的代码中可以看出,Mutex和Semaphore的功能都是由自旋锁作为基础实现的。在实际应用中,不但要考虑锁的功能性需求和性能,还需要考虑公平性等其他因素

MCS 锁

MCS locks and qspinlocks
下面是MCS锁的实现:

/* * In order to acquire the lock, the caller should declare a local node and * pass a reference of the node to this function in addition to the lock. * If the lock has already been acquired, then this will proceed to spin * on this node->locked until the previous lock holder sets the node->locked * in mcs_spin_unlock(). */static inlinevoid mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node){    struct mcs_spinlock *prev;    /* Init node */    node->locked = 0;    node->next   = NULL;    /*     * We rely on the full barrier with global transitivity implied by the     * below xchg() to order the initialization stores above against any     * observation of @node. And to provide the ACQUIRE ordering associated     * with a LOCK primitive.     */    prev = xchg(lock, node);    if (likely(prev == NULL)) {        /*         * Lock acquired, don't need to set node->locked to 1. Threads         * only spin on its own node->locked value for lock acquisition.         * However, since this thread can immediately acquire the lock         * and does not proceed to spin on its own node->locked, this         * value won't be used. If a debug mode is needed to         * audit lock status, then set node->locked value here.         */        return;    }    WRITE_ONCE(prev->next, node);    /* Wait until the lock holder passes the lock down. */    arch_mcs_spin_lock_contended(&node->locked);}/* * Releases the lock. The caller should pass in the corresponding node that * was used to acquire the lock. */static inlinevoid mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node){    struct mcs_spinlock *next = READ_ONCE(node->next);    if (likely(!next)) {        /*         * Release the lock by setting it to NULL         */        if (likely(cmpxchg_release(lock, node, NULL) == node))            return;        /* Wait until the next pointer is set */        while (!(next = READ_ONCE(node->next)))            cpu_relax();    }    /* Pass lock to next waiter. */    arch_mcs_spin_unlock_contended(&next->locked);}

扩展锁实现

  • qspinlock
  • qrwlock
  • rtmutex
  • rwsem
  • osq_lock

lockdep

内核锁死检测模块

futex

用户态锁,在用户态实现了mutex的快速路径获取,减少了访问内核态操作锁的次数,在失败后跳转到内核态执行锁的操作。

原创粉丝点击