xv6源码分析(七):文件系统

来源:互联网 发布:照片说话软件 编辑:程序博客网 时间:2024/05/01 20:22

xv6文件系统采用了分层的实现,下面的每一层都向上提供接口供上层调用,这里并不阐述xv6文件系统的系统细节,仅仅针对每一层需要注意的问题和各种接口的联系做解释,具体的文件系统细节可参考xv6中文文档。

xv6文件系统

这里写图片描述

块缓冲层

xv6将硬盘中的每个分区编号为各种块,每块512Byte,磁盘读写总是以块为单位,xv6使用结构buf来代表磁盘块数据在内核中的表示:

struct buf {  int flags;  uint dev;  uint blockno;  struct sleeplock lock;  uint refcnt;  struct buf *prev; // LRU cache list  struct buf *next;  struct buf *qnext; // disk queue  uchar data[BSIZE];};

xv6设置有内核缓冲区来缓存一定量的块,并用LRU来实现缓存替换。

struct {  struct spinlock lock;  struct buf buf[NBUF];  // Linked list of all buffers, through prev/next.  // head.next is most recently used.  struct buf head;} bcache;

xv6在内核中分配了静态数组然后通过head buf来构成双向链表,双向链表维护着块的使用频率,按照最近使用的顺序来组织结构能让块读取更加效率。
块缓冲层提供有binit,bget,bread,bwrite,brelse接口。

binit初始化bcache结构并设置块缓冲区需要使用的锁。

voidbinit(void){  struct buf *b;  initlock(&bcache.lock, "bcache");//PAGEBREAK!  // Create linked list of buffers  bcache.head.prev = &bcache.head;  bcache.head.next = &bcache.head;  for(b = bcache.buf; b < bcache.buf+NBUF; b++){    b->next = bcache.head.next;    b->prev = &bcache.head;    initsleeplock(&b->lock, "buffer");    bcache.head.next->prev = b;    bcache.head.next = b;  }}

bread根据参数确定设备号和块编号并调用bget得到块缓冲结构,bget在块缓冲区中找到缓冲块,如果此缓冲块已经有进程占用,则睡眠当前进程等待唤醒。如果bget没有找到相应的块缓冲结构,则在缓冲区中找到一个无效的块缓冲区并返回,由bread调用iderw来将数据读入内核。如果缓冲区满,bget简单滴panic。

// Look through buffer cache for block on device dev.// If not found, allocate a buffer.// In either case, return locked buffer.static struct buf*bget(uint dev, uint blockno){  struct buf *b;  acquire(&bcache.lock);  // Is the block already cached?  for(b = bcache.head.next; b != &bcache.head; b = b->next){    if(b->dev == dev && b->blockno == blockno){      b->refcnt++;      release(&bcache.lock);      acquiresleep(&b->lock);      return b;    }  }  // Not cached; recycle some unused buffer and clean buffer  // "clean" because B_DIRTY and not locked means log.c  // hasn't yet committed the changes to the buffer.  for(b = bcache.head.prev; b != &bcache.head; b = b->prev){    if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) {      b->dev = dev;      b->blockno = blockno;      b->flags = 0;      b->refcnt = 1;      release(&bcache.lock);      acquiresleep(&b->lock);      return b;    }  }  panic("bget: no buffers");}// Return a locked buf with the contents of the indicated block.struct buf*bread(uint dev, uint blockno){  struct buf *b;  b = bget(dev, blockno);  if(!(b->flags & B_VALID)) {    iderw(b);  }  return b;}

bwrite将块缓冲结构写入磁盘

voidbwrite(struct buf *b){  if(!holdingsleep(&b->lock))    panic("bwrite");  b->flags |= B_DIRTY;  iderw(b);}

brelse则减少块的引用次数,并移动块的位置实现LRU

// Release a locked buffer.// Move to the head of the MRU list.voidbrelse(struct buf *b){  if(!holdingsleep(&b->lock))    panic("brelse");  releasesleep(&b->lock);  acquire(&bcache.lock);  b->refcnt--;  if (b->refcnt == 0) {    // no one is waiting for it.    b->next->prev = b->prev;    b->prev->next = b->next;    b->next = bcache.head.next;    b->prev = &bcache.head;    bcache.head.next->prev = b;    bcache.head.next = b;  }  release(&bcache.lock);}//PAGEBREAK!// Blank page.

日志层

xv6使用了日志式文件系统来确保写操作不会导致文件系统的破坏,进程的写操作像一种“原子”操作,如果写操作过程中断电崩溃,将很大可能损坏文件系统,例如,在断电后目录有一个指向空闲i节点的项将可能导致严重的问题。

xv6使用了非常严格的日志读写来使读写操作要么完全完成,要么完成未完成。所有的读写操作首先都会写入磁盘中存放日志的区域,只有当真正的读写操作完成后才会使日志失效,这样,就算任何过程中断电或者其他原因导致系统崩溃,文件系统的组织结构都不会损坏,结果是要么操作完全完成,要么都未完成。尽管这样使得每个操作进行了两次,降低了读写效率。

xv6在硬盘中的日志有一个初始快和数据块,初始快包括一个数组,数组的值为对应数据块的内容应该写入文件系统中的哪一块,初始快还有当前有效数据块的计数。在内存中同样要一样的结构来存储数据。

struct logheader {  int n;  int block[LOGSIZE];};struct log {  struct spinlock lock;  int start;  int size;  int outstanding; // how many FS sys calls are executing.  int committing;  // in commit(), please wait.  int dev;  struct logheader lh;};

通过这种方式,bwrite可以使用log_write替代,当修改了内存中的块缓冲区后,log_wirte同时在block数组中记录这个块需要写到磁盘中的哪一块,但是没有立即写入,当调用commit的时候,调用write_log写入日志区域中,并调用write_head更新初始快,然后调用install_trans真正地更新文件系统,此时,发生崩溃都会导致日志有非零的计数,以便重启后再次进行写操作,最后将计数变量置零使日志失效并更新日志初始快。

通过log_write写入磁盘时,数据并不会立即写入磁盘,只有当调用commit来提交日志时,磁盘操作才会正式开始磁盘操作。

static voidcommit(){  if (log.lh.n > 0) {    write_log();     // Write modified blocks from cache to log    write_head();    // Write header to disk -- the real commit    install_trans(); // Now install writes to home locations    log.lh.n = 0;    write_head();    // Erase the transaction from the log  }}voidlog_write(struct buf *b){  int i;  if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)    panic("too big a transaction");  if (log.outstanding < 1)    panic("log_write outside of trans");  acquire(&log.lock);  for (i = 0; i < log.lh.n; i++) {    if (log.lh.block[i] == b->blockno)   // log absorbtion      break;  }  log.lh.block[i] = b->blockno;  if (i == log.lh.n)    log.lh.n++;  b->flags |= B_DIRTY; // prevent eviction  release(&log.lock);}

xv6日志读写支持并发操作,当要写操作时,调用begin_op,结束时调用end_op,begin_op检查日志是否正在提交,如果正在提交则睡眠当前进程,如果不在提交则增加操作次数,end_op减少操作次数,当没有任何进程正在操作log时,调用commit提交日志。

// called at the start of each FS system call.voidbegin_op(void){  acquire(&log.lock);  while(1){    if(log.committing){      sleep(&log, &log.lock);    } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){      // this op might exhaust log space; wait for commit.      sleep(&log, &log.lock);    } else {      log.outstanding += 1;      release(&log.lock);      break;    }  }}// called at the end of each FS system call.// commits if this was the last outstanding operation.voidend_op(void){  int do_commit = 0;  acquire(&log.lock);  log.outstanding -= 1;  if(log.committing)    panic("log.committing");  if(log.outstanding == 0){    do_commit = 1;    log.committing = 1;  } else {    // begin_op() may be waiting for log space.    wakeup(&log);  }  release(&log.lock);  if(do_commit){    // call commit w/o holding locks, since not allowed    // to sleep with locks.    commit();    acquire(&log.lock);    log.committing = 0;    wakeup(&log);    release(&log.lock);  }}

块分配器

// Allocate a zeroed disk block.static uintballoc(uint dev){  int b, bi, m;  struct buf *bp;  bp = 0;  for(b = 0; b < sb.size; b += BPB){    bp = bread(dev, BBLOCK(b, sb));    for(bi = 0; bi < BPB && b + bi < sb.size; bi++){      m = 1 << (bi % 8);      if((bp->data[bi/8] & m) == 0){  // Is block free?        bp->data[bi/8] |= m;  // Mark block in use.        log_write(bp);        brelse(bp);        bzero(dev, b + bi);        return b + bi;      }    }    brelse(bp);  }  panic("balloc: out of blocks");}// Free a disk block.static voidbfree(int dev, uint b){  struct buf *bp;  int bi, m;  readsb(dev, &sb);  bp = bread(dev, BBLOCK(b, sb));  bi = b % BPB;  m = 1 << (bi % 8);  if((bp->data[bi/8] & m) == 0)    panic("freeing free block");  bp->data[bi/8] &= ~m;  log_write(bp);  brelse(bp);}

i节点和i节点内容

i节点分为内核i节点(inode)和磁盘上的i节点(dinode),xv6使i节点表来缓存i节点

struct dinode {  short type;           // File type  short major;          // Major device number (T_DEV only)  short minor;          // Minor device number (T_DEV only)  short nlink;          // Number of links to inode in file system  uint size;            // Size of file (bytes)  uint addrs[NDIRECT+1];   // Data block addresses};// in-memory copy of an inodestruct inode {  uint dev;           // Device number  uint inum;          // Inode number  int ref;            // Reference count  struct sleeplock lock;  int flags;          // I_VALID  short type;         // copy of disk inode  short major;  short minor;  short nlink;  uint size;  uint addrs[NDIRECT+1];};struct {  struct spinlock lock;  struct inode inode[NINODE];} icache;

iinit负责初始化i节点相关内容

voidiinit(int dev){  int i = 0;  initlock(&icache.lock, "icache");  for(i = 0; i < NINODE; i++) {    initsleeplock(&icache.inode[i].lock, "inode");  }  readsb(dev, &sb);  cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ inodestart %d bmap start %d\n", sb.size, sb.nblocks,          sb.ninodes, sb.nlog, sb.logstart, sb.inodestart,          sb.bmapstart);}

ialloc在磁盘中找到空闲i节点并返回内核i节点

struct inode*ialloc(uint dev, short type){  int inum;  struct buf *bp;  struct dinode *dip;  for(inum = 1; inum < sb.ninodes; inum++){    bp = bread(dev, IBLOCK(inum, sb));    dip = (struct dinode*)bp->data + inum%IPB;    if(dip->type == 0){  // a free inode      memset(dip, 0, sizeof(*dip));      dip->type = type;      log_write(bp);   // mark it allocated on the disk      brelse(bp);      return iget(dev, inum);    }    brelse(bp);  }  panic("ialloc: no inodes");}

iupdate将内核i节点相关内容写入磁盘i节点

voidiupdate(struct inode *ip){  struct buf *bp;  struct dinode *dip;  bp = bread(ip->dev, IBLOCK(ip->inum, sb));  dip = (struct dinode*)bp->data + ip->inum%IPB;  dip->type = ip->type;  dip->major = ip->major;  dip->minor = ip->minor;  dip->nlink = ip->nlink;  dip->size = ip->size;  memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));  log_write(bp);  brelse(bp);}

iget返回一个内核i节点

static struct inode*iget(uint dev, uint inum){  struct inode *ip, *empty;  acquire(&icache.lock);  // Is the inode already cached?  empty = 0;  for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){    if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){      ip->ref++;      release(&icache.lock);      return ip;    }    if(empty == 0 && ip->ref == 0)    // Remember empty slot.      empty = ip;  }  // Recycle an inode cache entry.  if(empty == 0)    panic("iget: no inodes");  ip = empty;  ip->dev = dev;  ip->inum = inum;  ip->ref = 1;  ip->flags = 0;  release(&icache.lock);  return ip;}

idup复制一个i节点

struct inode*idup(struct inode *ip){  acquire(&icache.lock);  ip->ref++;  release(&icache.lock);  return ip;}

ilock锁住i节点并在必要的时候读取i节点元数据

voidilock(struct inode *ip){  struct buf *bp;  struct dinode *dip;  if(ip == 0 || ip->ref < 1)    panic("ilock");  acquiresleep(&ip->lock);  if(!(ip->flags & I_VALID)){    bp = bread(ip->dev, IBLOCK(ip->inum, sb));    dip = (struct dinode*)bp->data + ip->inum%IPB;    ip->type = dip->type;    ip->major = dip->major;    ip->minor = dip->minor;    ip->nlink = dip->nlink;    ip->size = dip->size;    memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));    brelse(bp);    ip->flags |= I_VALID;    if(ip->type == 0)      panic("ilock: no type");  }}

iunlock解锁i节点

// Unlock the given inode.voidiunlock(struct inode *ip){  if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1)    panic("iunlock");  releasesleep(&ip->lock);}
0 0
原创粉丝点击