ObjectStore获取文件系统的fsid
来源:互联网 发布:360企业云盘mac版 编辑:程序博客网 时间:2024/06/03 21:24
ceph version: Kraken
ObjectStore获取文件系统的fsid。OSD在用户态又构造了一层自己文件系统来管理数据,并为其分配了唯一标识UUID。该UUID是其文件系统元信息中的一员,底层使用的驱动不同其保存的位置也不同,如BlueStore,保存在块设备的第一个块中,FileStore,保存在日志设备中的第一个块中。
获取fsid方法:
int ObjectStore::probe_block_device_fsid( CephContext *cct, const string& path, uuid_d *fsid){ int r;//优先选择bluestore#if defined(HAVE_LIBAIO) // first try bluestore -- it has a crc on its header and will fail // reliably. r = BlueStore::get_block_device_fsid(cct, path, fsid); if (r == 0) { ¦ lgeneric_dout(cct, 0) << __func__ << " " << path << " is bluestore, " << *fsid << dendl; ¦ return r; }#endif // okay, try FileStore (journal). r = FileStore::get_block_device_fsid(cct, path, fsid); if (r == 0) { ¦ lgeneric_dout(cct, 0) << __func__ << " " << path << " is filestore, " << *fsid << dendl; ¦ return r; } return -EINVAL;}
BlueStore 获取osd文件系统的OSD uuid,该uuid保存在内存结构的bluestore_bdev_label_t,该结构保存在磁盘的第一个块中。
int BlueStore::get_block_device_fsid(CephContext* cct, const string& path, ¦ ¦ ¦uuid_d *fsid) { bluestore_bdev_label_t label; int r = _read_bdev_label(cct, path, &label); if (r < 0) ¦ return r; *fsid = label.osd_uuid; return 0; }
读取第一个block,反序列化得到label
int BlueStore::_read_bdev_label(CephContext* cct, string path, bluestore_bdev_label_t *label){ dout(10) << __func__ << dendl; //打开设备 int fd = ::open(path.c_str(), O_RDONLY); if (fd < 0) { ¦ fd = -errno; ¦ derr << __func__ << " failed to open " << path << ": " << cpp_strerror(fd) << dendl; ¦ return fd; } bufferlist bl; //从设备中读取指定大小的数据 int r = bl.read_fd(fd, BDEV_LABEL_BLOCK_SIZE); //BDEV_LABEL_BLOCK_SIZE = 4096第一个数据块 VOID_TEMP_FAILURE_RETRY(::close(fd)); if (r < 0) { ¦ derr << __func__ << " failed to read from " << path << ": " << cpp_strerror(r) << dendl; ¦ return r; }//校验数据的完整性,并将其反序列化 uint32_t crc, expected_crc; bufferlist::iterator p = bl.begin(); try { ¦ ::decode(*label, p); ¦ bufferlist t; ¦ t.substr_of(bl, 0, p.get_off()); ¦ crc = t.crc32c(-1); ¦ ::decode(expected_crc, p); } catch (buffer::error& e) { ¦ derr << __func__ << " unable to decode label at offset " << p.get_off() << ": " << e.what() << dendl; ¦ return -EINVAL; } if (crc != expected_crc) { ¦ derr << __func__ << " bad crc on label, expected " << expected_crc << " != actual " << crc << dendl; ¦ return -EIO; } dout(10) << __func__ << " got " << *label << dendl; return 0;}
FileStore 获取osd文件系统的OSD uuid
int FileStore::get_block_device_fsid(CephContext* cct, const string& path, uuid_d *fsid){ // make sure we don't try to use aio or direct_io (and get annoying // error messages from failing to do so); performance implications // should be irrelevant for this use FileJournal j(cct, *fsid, 0, 0, path.c_str(), false, false); return j.peek_fsid(*fsid); }// This can not be used on an active journalint FileJournal::peek_fsid(uuid_d& fsid){ assert(fd == -1); int r = _open(false, false); if (r) ¦ return r; r = read_header(&header); if (r < 0) ¦ goto out; fsid = header.fsid;out: close(); return r;} int FileJournal::_open(bool forwrite, bool create){ int flags, ret; if (forwrite) { ¦ flags = O_RDWR; ¦ if (directio) ¦ ¦ flags |= O_DIRECT | O_DSYNC; } else { ¦ flags = O_RDONLY; } if (create) ¦ flags |= O_CREAT; if (fd >= 0) { ¦ if (TEMP_FAILURE_RETRY(::close(fd))) { ¦ ¦ int err = errno; ¦ ¦ derr << "FileJournal::_open: error closing old fd: " ¦ ¦<< cpp_strerror(err) << dendl; ¦ } } //打开日志设备 fd = TEMP_FAILURE_RETRY(::open(fn.c_str(), flags, 0644)); if (fd < 0) { ¦ int err = errno; ¦ dout(2) << "FileJournal::_open unable to open journal " ¦ ¦ << fn << ": " << cpp_strerror(err) << dendl; ¦ return -err; }//获取指定文件的元信息,读取初始化日志文件(或设备)的相关数据(大小,块大小) struct stat st; ret = ::fstat(fd, &st); if (ret) { ¦ ret = errno; ¦ derr << "FileJournal::_open: unable to fstat journal: " << cpp_strerror(ret) << dendl; ¦ ret = -ret; ¦ goto out_fd; } //判断是常规文件还是裸块设备 if (S_ISBLK(st.st_mode)) { ¦ ret = _open_block_device(); } else if (S_ISREG(st.st_mode)) { ¦ if (aio && !force_aio) { ¦ ¦ derr << "FileJournal::_open: disabling aio for non-block journal. Use " ¦ ¦<< "journal_force_aio to force use of aio anyway" << dendl; ¦ ¦ aio = false; ¦ } ¦ ret = _open_file(st.st_size, st.st_blksize, create); } else { ¦ derr << "FileJournal::_open: wrong journal file type: " << st.st_mode ¦<< dendl; ¦ ret = -EINVAL; } if (ret) ¦ goto out_fd;//初始化libaio#ifdef HAVE_LIBAIO if (aio) { ¦ aio_ctx = 0; ¦ ret = io_setup(128, &aio_ctx); ¦ if (ret < 0) { ¦ ¦ switch (ret) { // Contrary to naive expectations -EAGIAN means ... case -EAGAIN: ¦ derr << "FileJournal::_open: user's limit of aio events exceeded. " ¦ ¦ ¦ ¦<< "Try increasing /proc/sys/fs/aio-max-nr" << dendl; ¦ break; default: ¦ derr << "FileJournal::_open: unable to setup io_context " << cpp_strerror(-ret) << dendl; ¦ break; ¦ ¦ } ¦ ¦ goto out_fd; ¦ } }#endif /* We really want max_size to be a multiple of block_size. */ max_size -= max_size % block_size; dout(1) << "_open " << fn << " fd " << fd ¦ << ": " << max_size ¦ << " bytes, block size " << block_size ¦ << " bytes, directio = " << directio ¦ << ", aio = " << aio ¦ << dendl; return 0; out_fd: VOID_TEMP_FAILURE_RETRY(::close(fd)); fd = -1; return ret;}
获取块设备的大小
获取块设备大小,检查是否大于最小日志大小要求。
int FileJournal::_open_block_device(){ int64_t bdev_sz = 0; int ret = get_block_device_size(fd, &bdev_sz); if (ret) { ¦ dout(0) << __func__ << ": failed to read block device size." << dendl; ¦ return -EIO; } /* Check for bdev_sz too small */ if (bdev_sz < ONE_MEG) { ¦ dout(0) << __func__ << ": your block device must be at least " ¦ ¦ << ONE_MEG << " bytes to be used for a Ceph journal." << dendl; ¦ return -EINVAL; } dout(10) << __func__ << ": ignoring osd journal size. " ¦ ¦<< "We'll use the entire block device (size: " << bdev_sz << ")" ¦ ¦<< dendl; max_size = bdev_sz; block_size = cct->_conf->journal_block_size; if (cct->_conf->journal_discard) { //获取磁盘对discard的支持(/sys/block/sdb/queue/discard_granularity) ¦ discard = block_device_support_discard(fn.c_str()); ¦ dout(10) << fn << " support discard: " << (int)discard << dendl; } return 0;}//获取块设备的大小int get_block_device_size(int fd, int64_t *psize){ #ifdef BLKGETSIZE64 int ret = ::ioctl(fd, BLKGETSIZE64, psize);#elif defined(BLKGETSIZE) unsigned long sectors = 0; int ret = ::ioctl(fd, BLKGETSIZE, §ors); *psize = sectors * 512ULL;#else// cppcheck-suppress preprocessorErrorDirective# error "Linux configuration error (get_block_device_size)"#endif if (ret < 0) ret = -errno; return ret; }
记录OSD日志的是一个文件,会使用该方法来打开该日志文件。
int FileJournal::_open_file(int64_t oldsize, blksize_t blksize, bool create){ int ret; //配置日志文件的大小 int64_t conf_journal_sz(cct->_conf->osd_journal_size); conf_journal_sz <<= 20; if ((cct->_conf->osd_journal_size == 0) && (oldsize < ONE_MEG)) { ¦ derr << "I'm sorry, I don't know how large of a journal to create." ¦<< "Please specify a block device to use as the journal OR " ¦<< "set osd_journal_size in your ceph.conf" << dendl; ¦ return -EINVAL; } if (create && (oldsize < conf_journal_sz)) { ¦ uint64_t newsize(conf_journal_sz); ¦ dout(10) << __func__ << " _open extending to " << newsize << " bytes" << dendl; //扩展日志文件大小,但是该方法只分配了虚拟的空间,即没有实际的数据块 ¦ ret = ::ftruncate(fd, newsize); ¦ if (ret < 0) { ¦ ¦ int err = errno; ¦ ¦ derr << "FileJournal::_open_file : unable to extend journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(err) << dendl; ¦ ¦ return -err; ¦ }#ifdef HAVE_POSIX_FALLOCATE//为文件分配实际的磁盘空间,以防止磁盘空间不足导致写入失败。 ¦ ret = ::posix_fallocate(fd, 0, newsize); ¦ if (ret) { ¦ ¦ derr << "FileJournal::_open_file : unable to preallocation journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(ret) << dendl; ¦ ¦ return -ret; ¦ } ¦ max_size = newsize;#elif defined(__APPLE__) ¦ fstore_t store; ¦ store.fst_flags = F_ALLOCATECONTIG; ¦ store.fst_posmode = F_PEOFPOSMODE; ¦ store.fst_offset = 0; ¦ store.fst_length = newsize;//同上 ¦ ret = ::fcntl(fd, F_PREALLOCATE, &store); ¦ if (ret == -1) { ¦ ¦ ret = -errno; ¦ ¦ derr << "FileJournal::_open_file : unable to preallocation journal to " ¦ ¦<< newsize << " bytes: " << cpp_strerror(ret) << dendl; ¦ ¦ return ret; ¦ } ¦ max_size = newsize;#else# error "Journal pre-allocation not supported on platform."#endif } else { ¦ max_size = oldsize; } block_size = cct->_conf->journal_block_size;//初始化日志空间,通过填充‘0’ if (create && cct->_conf->journal_zero_on_create) { ¦ derr << "FileJournal::_open_file : zeroing journal" << dendl; ¦ uint64_t write_size = 1 << 20; ¦ char *buf; //申请一块block_size内存对其的write_size大小的内存空间。 ¦ ret = ::posix_memalign((void **)&buf, block_size, write_size); ¦ if (ret != 0) { ¦ ¦ return -ret; ¦ } ¦ memset(static_cast<void*>(buf), 0, write_size); ¦ uint64_t i = 0; ¦ for (; (i + write_size) <= (uint64_t)max_size; i += write_size) { ¦ ¦ ret = ::pwrite(fd, static_cast<void*>(buf), write_size, i); ¦ ¦ if (ret < 0) { free(buf); return -errno; ¦ ¦ } ¦ } ¦ if (i < (uint64_t)max_size) { ¦ ¦ ret = ::pwrite(fd, static_cast<void*>(buf), max_size - i, i); ¦ ¦ if (ret < 0) { free(buf); return -errno; ¦ ¦ } ¦ } ¦ free(buf); } dout(10) << "_open journal is not a block device, NOT checking disk " ¦ ¦ ¦ ¦ ¦<< "write cache on '" << fn << "'" << dendl; return 0;}
读取日志的头,该头在日志的第一个块中
int FileJournal::read_header(header_t *hdr) const{ dout(10) << "read_header" << dendl; bufferlist bl; buffer::ptr bp = buffer::create_page_aligned(block_size); char* bpdata = bp.c_str(); int r = ::pread(fd, bpdata, bp.length(), 0); if (r < 0) { ¦ int err = errno; ¦ dout(0) << "read_header got " << cpp_strerror(err) << dendl; ¦ return -err; } // don't use bp.zero() here, because it also invalidates // crc cache (which is not yet populated anyway) if (bp.length() != (size_t)r) { ¦ ¦ // r will be always less or equal than bp.length ¦ ¦ bpdata += r; ¦ ¦ memset(bpdata, 0, bp.length() - r); } bl.push_back(std::move(bp)); try { ¦ bufferlist::iterator p = bl.begin(); ¦ ::decode(*hdr, p); } catch (buffer::error& e) { ¦ derr << "read_header error decoding journal header" << dendl; ¦ return -EINVAL; } /* ¦* Unfortunately we weren't initializing the flags field for new ¦* journals! Aie. This is safe(ish) now that we have only one ¦* flag. Probably around when we add the next flag we need to ¦* remove this or else this (eventually old) code will clobber newer ¦* code's flags. ¦*/ if (hdr->flags > 3) { ¦ derr << "read_header appears to have gibberish flags; assuming 0" << dendl; ¦ hdr->flags = 0; } print_header(*hdr); return 0;}void FileJournal::print_header(const header_t &header) const { dout(10) << "header: block_size " << header.block_size ¦ ¦<< " alignment " << header.alignment ¦ ¦<< " max_size " << header.max_size ¦ ¦<< dendl; dout(10) << "header: start " << header.start << dendl; dout(10) << " write_pos " << write_pos << dendl;}
阅读全文
0 0
- ObjectStore获取文件系统的fsid
- FileNet ce创建ObjectStore的错误解决
- nfs fsid
- Ceph高级工具介绍之ceph-objectstore-tool的使用
- ObjectStore::Transaction
- 获取某个硬盘的文件系统
- dojo小例子(26)ObjectStore onFetch方法获取记录总数
- Ubuntu12.04 安装配置NFSv4,注意点以及NFSv4的使用,fsid=0(草稿)
- CEPH OBJECTSTORE API介绍
- 获取文件属性的文件系统相关函数
- 获取系统路径所在的文件系统
- 获取网站的根目录的物理文件系统路径
- 获取linux的文件系统相关信息的知识点
- 获取文件系统信息
- 如何使用API获取文件系统的文件目录
- 如何使用API获取文件系统的文件目录
- 文件系统---文件系统的基本概念
- 文件系统--文件系统的架构
- Codeforces Round #218 (Div. 2)这场相对比较水啊
- 完美驱动 AppleHDA
- sensor介绍
- Edraw Max(亿图图示)新手福音:教你思维导图的简单画法!
- vnc与windows之间的复制粘贴
- ObjectStore获取文件系统的fsid
- .net下通过委托调用c++ 的dll文件中的回调函数
- plsql developer中如何设置sql window显示行号
- js的dataTime操作方法
- echart 工作记录
- 嵌入式C中volatile和const的用法
- 算法学习-中篇(持续更新)
- [初中oj]2157. 【2017.7.10普及】第四题(树的重心)
- 用Kotlin写android点击事件