xv6源码分析(七):文件系统
来源:互联网 发布:照片说话软件 编辑:程序博客网 时间:2024/05/01 20:22
xv6文件系统采用了分层的实现,下面的每一层都向上提供接口供上层调用,这里并不阐述xv6文件系统的系统细节,仅仅针对每一层需要注意的问题和各种接口的联系做解释,具体的文件系统细节可参考xv6中文文档。
xv6文件系统
块缓冲层
xv6将硬盘中的每个分区编号为各种块,每块512Byte,磁盘读写总是以块为单位,xv6使用结构buf来代表磁盘块数据在内核中的表示:
struct buf { int flags; uint dev; uint blockno; struct sleeplock lock; uint refcnt; struct buf *prev; // LRU cache list struct buf *next; struct buf *qnext; // disk queue uchar data[BSIZE];};
xv6设置有内核缓冲区来缓存一定量的块,并用LRU来实现缓存替换。
struct { struct spinlock lock; struct buf buf[NBUF]; // Linked list of all buffers, through prev/next. // head.next is most recently used. struct buf head;} bcache;
xv6在内核中分配了静态数组然后通过head buf来构成双向链表,双向链表维护着块的使用频率,按照最近使用的顺序来组织结构能让块读取更加效率。
块缓冲层提供有binit,bget,bread,bwrite,brelse接口。
binit初始化bcache结构并设置块缓冲区需要使用的锁。
voidbinit(void){ struct buf *b; initlock(&bcache.lock, "bcache");//PAGEBREAK! // Create linked list of buffers bcache.head.prev = &bcache.head; bcache.head.next = &bcache.head; for(b = bcache.buf; b < bcache.buf+NBUF; b++){ b->next = bcache.head.next; b->prev = &bcache.head; initsleeplock(&b->lock, "buffer"); bcache.head.next->prev = b; bcache.head.next = b; }}
bread根据参数确定设备号和块编号并调用bget得到块缓冲结构,bget在块缓冲区中找到缓冲块,如果此缓冲块已经有进程占用,则睡眠当前进程等待唤醒。如果bget没有找到相应的块缓冲结构,则在缓冲区中找到一个无效的块缓冲区并返回,由bread调用iderw来将数据读入内核。如果缓冲区满,bget简单滴panic。
// Look through buffer cache for block on device dev.// If not found, allocate a buffer.// In either case, return locked buffer.static struct buf*bget(uint dev, uint blockno){ struct buf *b; acquire(&bcache.lock); // Is the block already cached? for(b = bcache.head.next; b != &bcache.head; b = b->next){ if(b->dev == dev && b->blockno == blockno){ b->refcnt++; release(&bcache.lock); acquiresleep(&b->lock); return b; } } // Not cached; recycle some unused buffer and clean buffer // "clean" because B_DIRTY and not locked means log.c // hasn't yet committed the changes to the buffer. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ if(b->refcnt == 0 && (b->flags & B_DIRTY) == 0) { b->dev = dev; b->blockno = blockno; b->flags = 0; b->refcnt = 1; release(&bcache.lock); acquiresleep(&b->lock); return b; } } panic("bget: no buffers");}// Return a locked buf with the contents of the indicated block.struct buf*bread(uint dev, uint blockno){ struct buf *b; b = bget(dev, blockno); if(!(b->flags & B_VALID)) { iderw(b); } return b;}
bwrite将块缓冲结构写入磁盘
voidbwrite(struct buf *b){ if(!holdingsleep(&b->lock)) panic("bwrite"); b->flags |= B_DIRTY; iderw(b);}
brelse则减少块的引用次数,并移动块的位置实现LRU
// Release a locked buffer.// Move to the head of the MRU list.voidbrelse(struct buf *b){ if(!holdingsleep(&b->lock)) panic("brelse"); releasesleep(&b->lock); acquire(&bcache.lock); b->refcnt--; if (b->refcnt == 0) { // no one is waiting for it. b->next->prev = b->prev; b->prev->next = b->next; b->next = bcache.head.next; b->prev = &bcache.head; bcache.head.next->prev = b; bcache.head.next = b; } release(&bcache.lock);}//PAGEBREAK!// Blank page.
日志层
xv6使用了日志式文件系统来确保写操作不会导致文件系统的破坏,进程的写操作像一种“原子”操作,如果写操作过程中断电崩溃,将很大可能损坏文件系统,例如,在断电后目录有一个指向空闲i节点的项将可能导致严重的问题。
xv6使用了非常严格的日志读写来使读写操作要么完全完成,要么完成未完成。所有的读写操作首先都会写入磁盘中存放日志的区域,只有当真正的读写操作完成后才会使日志失效,这样,就算任何过程中断电或者其他原因导致系统崩溃,文件系统的组织结构都不会损坏,结果是要么操作完全完成,要么都未完成。尽管这样使得每个操作进行了两次,降低了读写效率。
xv6在硬盘中的日志有一个初始快和数据块,初始快包括一个数组,数组的值为对应数据块的内容应该写入文件系统中的哪一块,初始快还有当前有效数据块的计数。在内存中同样要一样的结构来存储数据。
struct logheader { int n; int block[LOGSIZE];};struct log { struct spinlock lock; int start; int size; int outstanding; // how many FS sys calls are executing. int committing; // in commit(), please wait. int dev; struct logheader lh;};
通过这种方式,bwrite可以使用log_write替代,当修改了内存中的块缓冲区后,log_wirte同时在block数组中记录这个块需要写到磁盘中的哪一块,但是没有立即写入,当调用commit的时候,调用write_log写入日志区域中,并调用write_head更新初始快,然后调用install_trans真正地更新文件系统,此时,发生崩溃都会导致日志有非零的计数,以便重启后再次进行写操作,最后将计数变量置零使日志失效并更新日志初始快。
通过log_write写入磁盘时,数据并不会立即写入磁盘,只有当调用commit来提交日志时,磁盘操作才会正式开始磁盘操作。
static voidcommit(){ if (log.lh.n > 0) { write_log(); // Write modified blocks from cache to log write_head(); // Write header to disk -- the real commit install_trans(); // Now install writes to home locations log.lh.n = 0; write_head(); // Erase the transaction from the log }}voidlog_write(struct buf *b){ int i; if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); if (log.outstanding < 1) panic("log_write outside of trans"); acquire(&log.lock); for (i = 0; i < log.lh.n; i++) { if (log.lh.block[i] == b->blockno) // log absorbtion break; } log.lh.block[i] = b->blockno; if (i == log.lh.n) log.lh.n++; b->flags |= B_DIRTY; // prevent eviction release(&log.lock);}
xv6日志读写支持并发操作,当要写操作时,调用begin_op,结束时调用end_op,begin_op检查日志是否正在提交,如果正在提交则睡眠当前进程,如果不在提交则增加操作次数,end_op减少操作次数,当没有任何进程正在操作log时,调用commit提交日志。
// called at the start of each FS system call.voidbegin_op(void){ acquire(&log.lock); while(1){ if(log.committing){ sleep(&log, &log.lock); } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ // this op might exhaust log space; wait for commit. sleep(&log, &log.lock); } else { log.outstanding += 1; release(&log.lock); break; } }}// called at the end of each FS system call.// commits if this was the last outstanding operation.voidend_op(void){ int do_commit = 0; acquire(&log.lock); log.outstanding -= 1; if(log.committing) panic("log.committing"); if(log.outstanding == 0){ do_commit = 1; log.committing = 1; } else { // begin_op() may be waiting for log space. wakeup(&log); } release(&log.lock); if(do_commit){ // call commit w/o holding locks, since not allowed // to sleep with locks. commit(); acquire(&log.lock); log.committing = 0; wakeup(&log); release(&log.lock); }}
块分配器
// Allocate a zeroed disk block.static uintballoc(uint dev){ int b, bi, m; struct buf *bp; bp = 0; for(b = 0; b < sb.size; b += BPB){ bp = bread(dev, BBLOCK(b, sb)); for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0){ // Is block free? bp->data[bi/8] |= m; // Mark block in use. log_write(bp); brelse(bp); bzero(dev, b + bi); return b + bi; } } brelse(bp); } panic("balloc: out of blocks");}// Free a disk block.static voidbfree(int dev, uint b){ struct buf *bp; int bi, m; readsb(dev, &sb); bp = bread(dev, BBLOCK(b, sb)); bi = b % BPB; m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0) panic("freeing free block"); bp->data[bi/8] &= ~m; log_write(bp); brelse(bp);}
i节点和i节点内容
i节点分为内核i节点(inode)和磁盘上的i节点(dinode),xv6使i节点表来缓存i节点
struct dinode { short type; // File type short major; // Major device number (T_DEV only) short minor; // Minor device number (T_DEV only) short nlink; // Number of links to inode in file system uint size; // Size of file (bytes) uint addrs[NDIRECT+1]; // Data block addresses};// in-memory copy of an inodestruct inode { uint dev; // Device number uint inum; // Inode number int ref; // Reference count struct sleeplock lock; int flags; // I_VALID short type; // copy of disk inode short major; short minor; short nlink; uint size; uint addrs[NDIRECT+1];};struct { struct spinlock lock; struct inode inode[NINODE];} icache;
iinit负责初始化i节点相关内容
voidiinit(int dev){ int i = 0; initlock(&icache.lock, "icache"); for(i = 0; i < NINODE; i++) { initsleeplock(&icache.inode[i].lock, "inode"); } readsb(dev, &sb); cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ inodestart %d bmap start %d\n", sb.size, sb.nblocks, sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, sb.bmapstart);}
ialloc在磁盘中找到空闲i节点并返回内核i节点
struct inode*ialloc(uint dev, short type){ int inum; struct buf *bp; struct dinode *dip; for(inum = 1; inum < sb.ninodes; inum++){ bp = bread(dev, IBLOCK(inum, sb)); dip = (struct dinode*)bp->data + inum%IPB; if(dip->type == 0){ // a free inode memset(dip, 0, sizeof(*dip)); dip->type = type; log_write(bp); // mark it allocated on the disk brelse(bp); return iget(dev, inum); } brelse(bp); } panic("ialloc: no inodes");}
iupdate将内核i节点相关内容写入磁盘i节点
voidiupdate(struct inode *ip){ struct buf *bp; struct dinode *dip; bp = bread(ip->dev, IBLOCK(ip->inum, sb)); dip = (struct dinode*)bp->data + ip->inum%IPB; dip->type = ip->type; dip->major = ip->major; dip->minor = ip->minor; dip->nlink = ip->nlink; dip->size = ip->size; memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); log_write(bp); brelse(bp);}
iget返回一个内核i节点
static struct inode*iget(uint dev, uint inum){ struct inode *ip, *empty; acquire(&icache.lock); // Is the inode already cached? empty = 0; for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ ip->ref++; release(&icache.lock); return ip; } if(empty == 0 && ip->ref == 0) // Remember empty slot. empty = ip; } // Recycle an inode cache entry. if(empty == 0) panic("iget: no inodes"); ip = empty; ip->dev = dev; ip->inum = inum; ip->ref = 1; ip->flags = 0; release(&icache.lock); return ip;}
idup复制一个i节点
struct inode*idup(struct inode *ip){ acquire(&icache.lock); ip->ref++; release(&icache.lock); return ip;}
ilock锁住i节点并在必要的时候读取i节点元数据
voidilock(struct inode *ip){ struct buf *bp; struct dinode *dip; if(ip == 0 || ip->ref < 1) panic("ilock"); acquiresleep(&ip->lock); if(!(ip->flags & I_VALID)){ bp = bread(ip->dev, IBLOCK(ip->inum, sb)); dip = (struct dinode*)bp->data + ip->inum%IPB; ip->type = dip->type; ip->major = dip->major; ip->minor = dip->minor; ip->nlink = dip->nlink; ip->size = dip->size; memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); brelse(bp); ip->flags |= I_VALID; if(ip->type == 0) panic("ilock: no type"); }}
iunlock解锁i节点
// Unlock the given inode.voidiunlock(struct inode *ip){ if(ip == 0 || !holdingsleep(&ip->lock) || ip->ref < 1) panic("iunlock"); releasesleep(&ip->lock);}
- xv6源码分析(七):文件系统
- xv6源码分析(一):BootLoader
- xv6源码分析(三):锁
- xv6源码分析(四):内存管理
- xv6源码分析(六):进程调度
- Linux内核源码分析--文件系统(七、Namei.c)
- xv6源码分析(二):内核初始化和多核启动
- xv6源码分析(五):异常、中断、系统调用机制
- xv6文件系统详解
- openMPM源码分析(七)
- DispatcherServlet 源码分析(七)
- xv6启动源码阅读
- Ext4文件系统架构分析(七) ——ioctl源码分析之扩展EXT4文件系统最后一个块组大小
- XV6 进程调度分析
- xv6的中断分析
- xv6 haedware 硬件分析
- UBIFS文件系统源码分析
- mina源码分析七(转)
- 在LCD上实现简易电子钟
- 用c++对一个5位数的任意整数,求出其降序数。例如,整数是82319,则其降序数是98321。试建立一个类DescendNUM,用于完成该功能。
- 关于成为一名嵌入式程序员的日常总结(7)
- 【PAT】1092. To Buy or Not to Buy
- bzoj2588:Count on a tree(可持久化线段树+Lca)
- xv6源码分析(七):文件系统
- 微信开发二之关键字回复
- JavaScript动态向表格添加数据
- 如何使用CSS
- 素数;最大公约数和最小公倍数;“冒泡”问题
- HDU 3085 Nightmare 双向bfs
- ubuntu16.04下安装cuda8.0
- ToolStripMenuItem
- POJ - 2387Til the Cows Come Home