Linux中POSIX文件锁的实现

来源：互联网发布：mac苹果商店打不开编辑：程序博客网时间：2024/05/21 09:15

我试图通过个人的理解方式讲解Linux文件锁的实现，使用的内核版本是3.13.0。

POSIX文件锁简介

先简单说下什么是文件锁。

Linux文件锁有两种：协同锁（有些成为建议锁）和强制锁。Linux读写文件时不会对协同锁做校验，只会对强制锁做验证。我只想看Linux内核如何同步多个进程同步读写，因此不考虑协同锁。

对于强制锁，按读写属性分，有读锁和写锁，或者解释为共享锁和排斥锁。很明显，因为多个进程可以同时读取同一区域的文件，而只有一个进程对文件某一区域写才是安全的。

Linux对文件锁的处理

先不看加锁相关的代码，先看看Linux读取文件时对锁的处理。提一下，本人在阅读代码时发现，跟踪某个调用时总是容易发散，或者因为逻辑过于复杂导致阅读很难进行，因此这里说代码逻辑的时候，紧跟目标，简化逻辑，以帮助理解为主。

Linux锁冲突检测

对锁的数据结构有个大致了解，现在开始看相关的系统处理。

读文件对应的系统调用为read，在内核中是sys_read，在fs/read_write.c中定义：

SYSCALL_DEFINE3(read,unsigned int, fd, char __user *, buf, size_t,count)

该调用做一些基本的参数校验动作后调用vfs_read。在这里，vfs_read首先对参数做校验，然后校验用户空间内存，接下来就是需要讨论的，对文件锁的验证：rw_verify_area。

rw_verify_area有四个参数：

int read_write：读（READ），写（WRITE）；

struct file *file：系统记录文件读写操作的信息，与文件描述符关联；

const loff_t *ppos：读写的文件当前位置；

size_t count：读写的字节数。

下面是rw_verify_area的函数实现。

</pre><pre name="code" class="cpp">int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count){    struct inode *inode;    loff_t pos;    int retval = -EINVAL;     inode = file_inode(file);   // 获取文件的inode数据。file是打开文件后才会产生的记录文件读写                                // 信息等的数据结构。而inode是记录文件本身的信息，比如文件创建                                // 时间、文件大小等信息，详细可以参考                                // http://blog.csdn.net/panda19881/article/details/7799499                                // (讲解struct inode)和                                // http://www.cnblogs.com/QJohnson/archive/2011/06/24/2089414.html                                // （讲解inode 与file的关系）。    if (unlikely((ssize_t) count < 0))    // 校验参数：读取的字节数不能是负数        return retval;    pos = *ppos;    if (unlikely(pos < 0)) {              // 读取的位置        if (!unsigned_offsets(file))            return retval;        if (count >= -pos) /* both values are in 0..LLONG_MAX */ <==> count >= pos + INT_MAX            return -EOVERFLOW;    } else if (unlikely((loff_t) (pos + count) < 0)) {        if (!unsigned_offsets(file))            return retval;    }     if (unlikely(inode->i_flock && mandatory_lock(inode))) { // 这里判断是否需要校验强制锁，                                                             // 不管怎么处理的，直接跳过去。        retval = <strong><span style="color:#ff0000;">locks_mandatory_area</span></strong>(                       // 看名字，就是这里了            read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,            inode, file, pos, count);        if (retval < 0)            return retval;    }    retval = security_file_permission(file,        // 这个是做安全相关验证的，不管它                read_write == READ ? MAY_READ : MAY_WRITE);    if (retval)        return retval;    return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;}

这里面与文件锁相关的关键点在这里：locks_mandatory_area，可惜它也是对核心的一个封装，看下实现。

int locks_mandatory_area(int read_write, struct inode *inode,             struct file *filp, loff_t offset,             size_t count){    struct file_lock fl;  // file_lock是内核中用来记录文件锁相关信息的，它的主要成员会在下面做初始化，                          // 因此看看下面的几行就可以对它有个基本了解了。    int error;     locks_init_lock(&fl);          // 最基本的数据结构初始化，忽略它先。    fl.fl_owner = current->files;  // 文件锁的owner，这里是指当前task的文件files列表                                   // (fl_owner: struct files_struct *)。    fl.fl_pid = current->tgid;     // tgid是线程相对于进程的线程号，thread group id    fl.fl_file = filp;             // 文件（struct file *）    fl.fl_flags = FL_POSIX | FL_ACCESS;          // 文件锁标志（POSIX锁，FL_ACCESS表示仅仅锁校验）    if (filp && !(filp->f_flags & O_NONBLOCK))   // 判断是否允许阻塞操作        fl.fl_flags |= FL_SLEEP;                 // 如果可以阻塞，加上FL_SLEEP参数    fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;  // 锁类型，读还是写    fl.fl_start = offset;   // 锁的起始位置    fl.fl_end = offset + count - 1;              // 锁的结束为止，与fl_start一起表示一个文件区域     for (;;) {                                   // 这里做一个循环，一直等到锁冲突接触或者出现异常        error = <strong><span style="color:#ff0000;">__posix_lock_file</span></strong>(inode, &fl, NULL);                // 这里对文件锁做校验        if (error != FILE_LOCK_DEFERRED)            break;        error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);  // 等待其他进程解锁        if (!error) {            /*             * If we've been sleeping someone might have             * changed the permissions behind our back.             */            if (__mandatory_lock(inode))        // Linux的注释很清楚，就是这个循环过程中，可能有人                                                // 修改文件属性，导致不需要加锁                continue;        }         locks_delete_block(&fl);                // 把锁从阻塞队列中移除        break;    }     return error;}

锁冲突检测核心逻辑

紧跟步伐，看看这个函数怎么做的：__posix_lock_file，函数太长，先看一部分（代码都是一点点看的^_^）。

static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock){    struct file_lock *fl;    struct file_lock *new_fl = NULL;    struct file_lock *new_fl2 = NULL;    struct file_lock *left = NULL;    struct file_lock *right = NULL;    struct file_lock **before;    int error;    bool added = false;     /*     * We may need two file_lock structures for this operation,     * so we get them in advance to avoid races.     *     * In some cases we can be sure, that no new locks will be needed     */    if (!(request->fl_flags & FL_ACCESS) &&        (request->fl_type != F_UNLCK ||         request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {        new_fl = locks_alloc_lock();        new_fl2 = locks_alloc_lock();    }    // 这里弄了两个新的锁，先不管他，待会儿哪里用到了再看（让流程简化）     spin_lock(&inode->i_lock);    /*     * New lock request. Walk all POSIX locks and look for conflicts. If     * there are any, either return error or put the request on the     * blocker's list of waiters and the global blocked_hash.     */    if (request->fl_type != F_UNLCK) {  // 从读函数过来的，fl_type是F_RDLCK。        for_each_lock(inode, before) {  // 这里是对inode相关的锁做一个遍历                                        // （inode是什么？它记录了一个文件的信                                        // 息，不是文件打开时创建的，文件在，                                        // 它就在。打开一次文件，就会有一个                                        // struct file 与它关联，当然加一次锁，                                        // 也会记录到这个结构上来）            fl = *before;            if (!IS_POSIX(fl))          // 我们看的都是POSIX锁。                continue;            if (!<strong><span style="color:#ff0000;">posix_locks_conflict</span></strong>(request, fl))  // 检测冲突，待会儿重点看下它                continue;            if (conflock)                            // 这个参数是传过来的，是NULL，不管它                __locks_copy_lock(conflock, fl);            error = -EAGAIN;            if (!(request->fl_flags & FL_SLEEP))     // 冲突了，也不等，直接退出                goto out;            /*             * Deadlock detection and insertion into the blocked             * locks list must be done while holding the same lock!             */            error = -EDEADLK;            spin_lock(&blocked_lock_lock);                     // 这里检测死锁            if (likely(!<strong><span style="color:#ff0000;">posix_locks_deadlock</span></strong>(request, fl))) {  // 这是核心，检测死锁的。                error = FILE_LOCK_DEFERRED;                __locks_insert_block(fl, request);            }            spin_unlock(&blocked_lock_lock);            goto out;          }      }     /* If we're just looking for a conflict, we're done. */    error = 0;    if (request->fl_flags & FL_ACCESS)   // 到这里，说明inode中的锁全部校验完了，                                         // 没有跟我们的锁有冲突的，而且我们也就是                                         // 检测是否有锁冲突，不加锁也不解锁                                         // (FL_ACCESS)。read读取校验锁的部分结                                         // 束了，所以这段代码暂时看到这里！        goto out;    // ……..    // 到此为止，简化逻辑

这段代码看下来，对检测锁冲突的流程有了了解，它是拿读的区域跟当前inode（即文件）的所有锁做检测，看看哪个有冲突，所有都没有冲突，那就可以返回成功。否则，如果不是阻塞操作，也退出。允许阻塞时，做一个死锁检测（死锁检测是干啥的？）。

现在看里面两个重点：posix_locks_conflict和posix_locks_deadlock。

检测两个锁之间是否存在冲突

锁冲突检测：这个函数看起来简单，就几行代码，其实也真的好简单。

/* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl){    /* POSIX locks owned by the same process do not conflict with     * each other.     */    if (!IS_POSIX(sys_fl) || <strong><span style="color:#ff0000;">posix_same_owner</span></strong>(caller_fl, sys_fl)) // 1. 只检测POSIX锁，这个逻辑跟上面的重复了                                                                  // 2. 看看两个锁的owner是否相同，是同一个                                                                  // owner也不算冲突        return (0);     /* Check whether they overlap */    if (!<strong><span style="color:#ff0000;">locks_overlap</span></strong>(caller_fl, sys_fl))       // 判断两个锁的区域是否有重叠        return 0;     return (<strong><span style="color:#ff0000;">locks_conflict</span></strong>(caller_fl, sys_fl));  // 两个都不能是写锁，写锁就是冲突}

抛开锁的类型（只考虑POSIX锁），判断两个锁有冲突条件是：

锁的owner不同；

锁的区域有重叠；

其中一个是写锁（排斥锁）。

file_lock有个成员变量是fl_owner，但是检测两个锁的owner是否相同不是仅仅判断下这两个变量相同：

static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2){    if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner)        return fl2->fl_lmops == fl1->fl_lmops &&            fl1->fl_lmops->lm_compare_owner(fl1, fl2);  // 他还支持自定义比较两把锁                                                        // owner的函数，好吧，我也脱离主干分岔了    return fl1->fl_owner == fl2->fl_owner;              // 在这里，其实就是检测下owner变量是否相等。}

再看看怎么检测区域重叠，就像检测一条直线上的两个线段是否有重叠：

/* Check if two locks overlap each other. */static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2){    return ((fl1->fl_end >= fl2->fl_start) &&        (fl2->fl_end >= fl1->fl_start));}

简单的啥也不用说。

死锁检测

锁冲突检测完毕，看看死锁检测posix_locks_deadlock。

/* Must be called with the blocked_lock_lock held! */static int posix_locks_deadlock(struct file_lock *caller_fl,                struct file_lock *block_fl)// 两个参数：caller_fl，我们检测的那个锁；block_fl，有冲突的那个锁// 如果这个函数返回1，就认为有死锁{    int i = 0;     while ((block_fl = <strong><span style="color:#ff0000;">what_owner_is_waiting_for</span></strong>(block_fl))) { // 这个函数从阻塞<span style="font-family: Arial, Helvetica, sans-serif;">列表中找跟block_fl同</span><span style="font-family: Arial, Helvetica, sans-serif;">                                                                                                                                     </span><span style="font-family: Arial, Helvetica, sans-serif;"></span><pre name="code" class="cpp">                                                               //<span style="font-family: Arial, Helvetica, sans-serif;"> 一个owner的锁，待会儿看它的实现</span>        if (i++ > MAX_DEADLK_ITERATIONS)      // 如果同一个owner下等待的锁达到了上限，就<span style="font-family: Arial, Helvetica, sans-serif;">认为是死锁</span><pre name="code" class="cpp">                                              //<span style="font-family: Arial, Helvetica, sans-serif;">（MAX_DEADLK_ITERATIONS，我的代码中是10）</span>            return 0;        if (posix_same_owner(caller_fl, block_fl)) // 如果有个锁跟我们要检测的锁属<span style="font-family: Arial, Helvetica, sans-serif;">于同一个owner，认为</span><span style="font-family: Arial, Helvetica, sans-serif;">                                                                                                                       // </span><span style="font-family: Arial, Helvetica, sans-serif;"> 是死锁</span>            return 1;    }    return 0;  // 检测完了，没有相同owner的，没有死锁}

再回过头来看__posix_lock_file函数，它的作用是检测文件关联的所有锁，看看是否有冲突，就是owner不同、区域有重叠且是个写锁。

Linux对文件读写锁/解锁的实现

如果仅仅是读取或者写入操作，那锁的校验也就到此结束了。不过现在还不知道锁是怎么添加的，怎么解锁的。

Linux对提供了一个fcntl函数来操作POSIX文件锁。

fcntl的定义如下：

int fcntl(intfildes, int cmd, ...);

fildes是文件描述符；

cmd表示本次fcntl操作的命令；

后面是命令的参数，如果是文件锁，对应的cmd是F_SETLK(加读写锁、解锁等)，后面的参数是struct flock：

shortl_type 文件锁的类型： F_RDLCK, F_WRLCK, F_UNLCK

short l_whence 从哪里开始（文件起始位置、当前位置还是末尾）

off_tl_start 锁的起始相对偏移量

off_tl_len 大小，如果是0表示到文件末尾

pid_tl_pid 当前拥有这把锁的进程ID，在F_GETLK时返回。

fcntl对应的系统调用函数是(fs/fcntl.c)：

SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg){       struct fd f = fdget_raw(fd);            // 根据文件描述符获取文件相关信息    long err = -EBADF;     if (!f.file)        goto out;     if (unlikely(f.file->f_mode & FMODE_PATH)) {           // 验证参数有效性        if (!check_fcntl_cmd(cmd))            goto out1;    }     err = security_file_fcntl(f.file, cmd, arg);            // 安全相关    if (!err)        err = do_fcntl(fd, cmd, arg, f.file);               // 真正的处理系统调用 out1:     fdput(f);out:    return err;}do_fcntl的定义如下：static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,        struct file *filp){    long err = -EINVAL;     switch (cmd) {    ….. // 此处省略N个字节    case F_GETLK:        // 获取锁信息        err = fcntl_getlk(filp, (struct flock __user *) arg);           break;    case F_SETLK:    case F_SETLKW:       // 加锁        err = <strong><span style="color:#ff0000;">fcntl_setlk</span></strong>(fd, filp, cmd, (struct flock __user *) arg);        break;    ……. // 再省略N个字节    }    return err;}

看看加锁的fcntl_setlk函数：

int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,        struct flock __user *l){    struct file_lock *file_lock = locks_alloc_lock();    // 申请一个file_lock    struct flock flock;    struct inode *inode;    struct file *f;    int error;     if (file_lock == NULL)        return -ENOLCK;     /*     * This might block, so we do it before checking the inode.     */    error = -EFAULT;    if (copy_from_user(&flock, l, sizeof(flock)))    // 把数据从用户空间复制到内核空间        goto out;     inode = file_inode(filp);                        // 获取文件的inode结构     /* Don't allow mandatory locks on files that may be memory mapped     * and shared.     */    if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {    // 校验加锁的条件        error = -EAGAIN;        goto out;    } again:                                                       // 这里有一个循环    error = <strong><span style="color:#ff0000;">flock_to_posix_lock</span></strong>(filp, file_lock, &flock);    // 根据传入的flock信息转换为内核数据结构                                                             // file_lock（按照flock提供的文件区域信                                                             // 息，填充到file_lock中）    if (error)        goto out;    if (cmd == F_SETLKW) {                   // 最后一个W是Wait的意思，就是允许等待        file_lock->fl_flags |= FL_SLEEP;    }       error = -EBADF;    switch (flock.l_type) {                  // 验证锁类型与当前打开文件的模式是否匹配    case F_RDLCK:        if (!(filp->f_mode & FMODE_READ))    // 加了读锁，但是文件打开模式不包含读取            goto out;        break;    case F_WRLCK:                            // 写锁，校验与读锁类似        if (!(filp->f_mode & FMODE_WRITE))            goto out;        break;    case F_UNLCK:                            // 解锁啥都不用干了，就是解锁        break;    default:        error = -EINVAL;        goto out;    }     error = <strong><span style="color:#ff0000;">do_lock_file_wait</span></strong>(filp, cmd, file_lock);   // 看名字猜，这里就是去加锁的函数     // 如果在这个过程中，这个文件被关闭了，文件描述符重新分配    // 给了其他文件，就得回退，加了的锁再解锁    /*     * Attempt to detect a close/fcntl race and recover by     * releasing the lock that was just acquired.     */    /*     * we need that spin_lock here - it prevents reordering between     * update of inode->i_flock and check for it done in close().     * rcu_read_lock() wouldn't do.     */    spin_lock(¤t->files->file_lock);    f = fcheck(fd);    // 获取描述符fd对应的struct file 结构（文件打开时会创建对应的struct file）    spin_unlock(¤t->files->file_lock);    if (!error && f != filp && flock.l_type != F_UNLCK) {  // !error: 加锁函数执行成                                  // 功，表示加锁或者解锁操作成功；f != filep：描述符对                                  // 应的打开文件已经变了，需要做回退操作；                                  // flock.l_type != F_UNLCK：如果原来就是解锁了，那                                  // 就不用回退，解锁无需回退        flock.l_type = F_UNLCK;   // 加锁变成解锁        goto again;               // 执行回退操作    } out:    locks_free_lock(file_lock);    return error;}

flock_to_posix_lock这个函数比较简单，是把用户层的参数flock转换为内核层数据结构file_lock，既然简单，就简单看下吧。

static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,                   struct flock *l){    off_t start, end;     switch (l->l_whence) {   // 文件偏移量从哪里计算    case SEEK_SET:           // 从文件起始处        start = 0;        break;    case SEEK_CUR:           // 文件当前位置        start = filp->f_pos; // struct file记录了当前操作的文件位置        break;    case SEEK_END:           // 文件结尾        start = i_size_read(file_inode(filp)); // 这个就是文件大小        break;    default:        return -EINVAL;    }     /* POSIX-1996 leaves the case l->l_len < 0 undefined;       POSIX-2001 defines it. */    start += l->l_start;    // 计算加锁的文件起始位置：文件便宜位置+相对位置    if (start < 0)        return -EINVAL;    fl->fl_end = OFFSET_MAX;     // 下面几行计算加锁区域的结束位置    if (l->l_len > 0) {          // 加锁的长度大于0，没什么说的        end = start + l->l_len - 1;        fl->fl_end = end;    } else if (l->l_len < 0) {   // 如过给的是负数，区域结尾就是当前计算的                                 // 起始位置start-1，起始位置改成起始位置                                 // 加上这个负的长度        end = start - 1;        fl->fl_end = end;        start += l->l_len;        if (start < 0)            return -EINVAL;    }                            // 这里没有说长度是0的处理，其实就用的默                                 // 认值OFFSET_MAX，可以一直到文件末尾    fl->fl_start = start;    /* we record the absolute position */    if (fl->fl_end < fl->fl_start)        return -EOVERFLOW;       fl->fl_owner = current->files; // 这几个参数跟前面说的计算方法是一样的    fl->fl_pid = current->tgid;    fl->fl_file = filp;    fl->fl_flags = FL_POSIX;    fl->fl_ops = NULL;    fl->fl_lmops = NULL;     return assign_type(fl, l->l_type);  // 校验l_type是否合法：读锁、写锁和解锁之一}

回到重点do_lock_file_wait函数，其实这个函数也没啥重点的，只是它调用了核心函数。

static int do_lock_file_wait(struct file *filp, unsigned int cmd,                 struct file_lock *fl){    int error;     error = security_file_lock(filp, fl->fl_type);  // 安全检测，略过    if (error)        return error;     for (;;) {   // 循环尝试加锁或解锁        error = <strong><span style="color:#ff0000;">vfs_lock_file</span></strong>(filp, cmd, fl, NULL); // 加锁核心函数        if (error != FILE_LOCK_DEFERRED)            break;        error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);  // 需要等待就等一下        if (!error)            continue;         locks_delete_block(fl);        break;    }     return error;}

vfs_lock_file相当简单，也只是一个函数的封装：

int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf){    if (filp->f_op->lock)    // 文件系统如果提供了自己的锁，就用自己的（ext2文件系统没有提供这个文件锁函数，所以不看它）        return filp->f_op->lock(filp, cmd, fl);    else        return <strong><span style="color:#ff0000;">posix_lock_file</span></strong>(filp, fl, conf); // 系统提供的标准POSIX文件锁}

posix_lock_file真的也不想说什么，它就一句话：

return __posix_lock_file(file_inode(filp),fl, conflock);

加锁解锁的核心逻辑

__posix_lock_file，前面也提到了。它分为两部分，第一部分的功能是检测文件锁是否有冲突，在上面已经介绍过了；第二部分是真正的加锁，或者解锁的，主要的还是处理与当前的锁处理冲突，比如合并、分解，或者删除。

只看第二部分，看代码前提前说一点，文件锁都是按区域来的，对一个区域，只能有一种类型的锁，多个文件锁，是按照区域从小到大来排序的，而且不会有交叉重叠。好，来看代码吧，只有下半部分。

/*     * Find the first old lock with the same owner as the new lock.     */       before = &inode->i_flock;    // struct file_lock *，与这个文件关联的文件锁列表    // 下面的合并或者拆解操作都是针对同一个owner的锁操作的    /* First skip locks owned by other processes.  */    while ((fl = *before) && (!IS_POSIX(fl) ||       // 找到第一个同一个owner的锁                  !posix_same_owner(request, fl))) {        before = &fl->fl_next;    }     /* Process locks with this owner. */    while ((fl = *before) && posix_same_owner(request, fl)) {        /* Detect adjacent or overlapping regions (if same lock type)         */        if (request->fl_type == fl->fl_type) {       // 锁的类型是相同的，都是读锁，或都是写锁，不会是解锁            // 这里先找到有区域重叠的锁            // 不过要注意的是，不能用end+1,只能用start -1            // 来比较，因为end可能是OFFSET_MAX，这时候            // end+1就溢出了            if (fl->fl_end < request->fl_start - 1)         // 新锁在fl的右边                goto next_lock;            /* If the next lock in the list has entirely bigger             * addresses than the new one, insert the lock here.             */            if (fl->fl_start - 1 > request->fl_end)         // 新锁在左边                break;             /* If we come here, the new and old lock are of the             * same type and adjacent or overlapping. Make one             * lock yielding from the lower start address of both             * locks to the higher end address.             */            // 有重叠或者相同，把它们合并起来            if (fl->fl_start > request->fl_start)           // 找左边界                fl->fl_start = request->fl_start;            else                request->fl_start = fl->fl_start;            if (fl->fl_end < request->fl_end)               // 找右边界                fl->fl_end = request->fl_end;            else                request->fl_end = fl->fl_end;            if (added) {                        // 这个added是说是不是第一次合并，如果不是的话，得删掉一把                                                // 锁，第一次不用删                locks_delete_lock(before);      // 删除后fl自动移动到了下一个锁                continue;            }            request = fl;            added = true;        }        else {             // 处理不同类型的锁，如果有重叠的话，新锁的区域应该覆盖旧锁            /* Processing for different lock types is a bit             * more complex.             */            if (fl->fl_end < request->fl_start)  // 没有重叠，不过这里比较的不是start – 1，而是start，                                                 // 为什么？因为相同类型的锁，边界相邻，需要做合并                goto next_lock;            if (fl->fl_start > request->fl_end)  // 没有重叠                break;            if (request->fl_type == F_UNLCK)                     added = true;                    // added = true将锁删掉            if (fl->fl_start < request->fl_start)// 当前锁的左边界在request左边界的左边                left = fl;                       // 记录下left            /* If the next lock in the list has a higher end             * address than the new one, insert the new one here.             */            if (fl->fl_end > request->fl_end) {  // 当前的锁右边界在request右边界的右边                right = fl;                      // 记录下right                break;                           // 找到了比request还要靠右的，那就不用找了，后面的肯定跟                                                 // request没有重叠的            }            if (fl->fl_start >= request->fl_start) {    // 这把锁的左边界在request左边界的右边，结合上面                                                      // 那个分析判断，这时候fl肯定完全被request覆盖掉了                /* The new lock completely replaces an old                 * one (This may happen several times).                 */                if (added) {                           // 如果已经添加过一把新锁或者是解锁操作                    locks_delete_lock(before);         // 直接删除旧锁，否则à                    continue;                }                /* Replace the old lock with the new one.                 * Wake up anybody waiting for the old one,                 * as the change in lock type might satisfy                 * their needs.                 */                locks_wake_up_blocks(fl);                fl->fl_start = request->fl_start;      // 用新的锁覆盖旧的锁                fl->fl_end = request->fl_end;                fl->fl_type = request->fl_type;                locks_release_private(fl);                locks_copy_private(fl, request);                request = fl;                added = true;            }        }        /* Go on to next lock.         */    next_lock:        before = &fl->fl_next;    }     // 这个循环做完了，可能会得到这样的结果：<span style="font-family: Arial, Helvetica, sans-serif;">    </span>    // 1. 与相同类型的锁合并了，或者合并了一部分；    // 2. 替换掉了不同类型的锁；    // 3. 找到了一个这个锁左边的一把锁（指左边界）；    // 4. 找到了一个这个锁右边的一把锁（指右边界）    /*     * The above code only modifies existing locks in case of merging or     * replacing. If new lock(s) need to be inserted all modifications are     * done below this, so it's safe yet to bail out.     */    error = -ENOLCK; /* "no luck" */    if (right && left == right && !new_fl2)  // 如果找到了一个可以覆盖request的锁，但是没有创建一个新锁                    // new_fl2的话，直接退出。什么时候会创建new_fl2?在这个函数最前面有一个判断，new_fl2与                    // new_fl同时创建，条件是：1. 本次不是仅仅检测锁状态，2. 不是解锁，起始位置不是0，结束                    // 为止不是OFFSET_MAX（综合起来看第二个条件，就是说如果解锁时，解的是从0到最大值，就是                    // 整个文件上加的锁肯定会解开，那就不可能会创建新的锁），那就会创建这两个锁        goto out;     error = 0;    if (!added) {                              // 这个标志打上了，说明request没有被因为锁区域冲突处理过        if (request->fl_type == F_UNLCK) {     // 解锁操作            if (request->fl_flags & FL_EXISTS) // FL_EXISTS这个标志位是说，如果执行解锁操作时，仅仅查看锁是否存在                error = -ENOENT;               // 返回不存在（没有冲突的锁）            goto out;        }         // request锁在所有锁的缝隙中，与其他锁没有任何区域重叠，或者就是第一把锁，就把这把锁加进去        if (!new_fl) {            error = -ENOLCK;            goto out;        }        locks_copy_lock(new_fl, request);        locks_insert_lock(before, new_fl);        new_fl = NULL;    }     // 下面处理类型不同，但是有重叠区域的锁    // right是指右边界在request右边的锁    // left是指左边界在request左边的锁    if (right) {                   // 如果找到了request右边的那把锁        if (left == right) {       // 且左边的锁跟右边的是同一把，就是这把旧锁的区域把request的区域覆盖了，                                   // 那就把这把锁拆开，左边一个，中间一个request，右边一个            /* The new lock breaks the old one in two pieces,             * so we have to use the second new lock.             */            left = new_fl2;       // 左边保留            new_fl2 = NULL;            locks_copy_lock(left, right);            locks_insert_lock(before, left);        }        right->fl_start = request->fl_end + 1;  // 调整右边锁的区域，左边锁的区域下面会调整        locks_wake_up_blocks(right);    }    if (left) {                    // 找到了request左边的那把锁，但是没有覆盖掉request（否则right也等于                                   // left，在上面的逻辑就处理掉了）；或者是找到了覆盖掉request的那把锁，                                   // 现在处理左半边那把锁        left->fl_end = request->fl_start - 1;        locks_wake_up_blocks(left);    } out:    spin_unlock(&inode->i_lock);    /*     * Free any unused locks.     */    if (new_fl)        locks_free_lock(new_fl);    if (new_fl2)        locks_free_lock(new_fl2);    return error;}

加锁的核心部分已经完成了，代码函数虽然有点长，但是逻辑还是很清晰，目的就是检测锁冲突，新锁覆盖旧锁。

总结

POSIX文件锁是为了保证多个程序同时访问同一个文件时数据的完整性。Linux在每次文件读写和加锁时都会检测是否有锁冲突，每次加锁或解锁，都会更新相应区域的锁为新锁类型，当然，解锁是直接把该区域的锁信息删除。另外，Linux将文件锁的区域按照从左到右排序，提高了锁的访问效率。

0 0