sheepdog 源码学习（1）

来源：互联网发布：c语言四则运算程序问题编辑：程序博客网时间：2024/04/29 17:46

最近比较忙，做毕设（sheepdog)，要在sheepdog的基础上做点东西，也就是说要改sheepdog源码。我只有一个月时间，所以最近一直都在读sheepdog源码。C 语言太菜，里面好多用法都不会，基本都是现边看边查。。。

sheepdog 可以干什么，首先，你需要启动sheep，启动的命令可以用下面这个。当然可以用 test 目录下的测试脚本来启动一个虚拟的 sheepdog 集群。何谓虚拟呢？就是这个脚本可以在你的主机上启动5个sheep进程，每个sheep进程占用不同的端口，这样这5个sheep进程就组成了一个sheepdog集群了。这样，你也就拥有一个sheepdog集群了。之后，你就可以用dog命令来看一些东西了，比如： dog node list, dog vdi list. 之类的，具体是什么意思就需要你去 sheepdog github 去读点文档啦。。。和系统相关的先说这些吧，这只是个开始，但并不是我重点。再罗嗦一点，这样默认启动后，sheepdog 的日志目录是在 /tmp/sheepdog/ 目录下。这个目录下面有这样几个子目录。首先可能是 0/ 1/ 2/ 3/ ... 以数字命名的目录，这表示这是 node ID 的log 目录。因为你是再主机上虚拟出的几个 sheep，每一个sheep 都要有一个 log 目录，这是自然的啦。。。进入任意一个目录，就拿0/作比方吧，进去可以看到有这样几个文件，obj/ sheep.log epoch sock config 等文件。其中 obj 就是用来存放数据块的目录哦，如果你给你创建的vdi 写入了比较多的东西，那么这个目录下应该有很多文件，并且每一个都是 4M。哈哈，不信你可以试一下。当然，要读系统debug 日志的话，打开 sheep.log 读就可以了。

sheep /tmp/sheepdog/4 -z 4 -p 7004 -c local -n -y 127.0.0.1 -d# 当然，这是我从 test 目录下的脚本里偷出来的哈。。。

dog 是 sheepdog 中很重要的一个组成部分，它提供了一系列的系统命令。要分析整个系统的功能，我们当然可以从这里入手，顺藤摸瓜。。。好吧。开始吧。首先我们找到这个文件，会是谁呢，当然是 dog.c 啦，还能有谁啊，打开它啊，从哪里对呢，当然是从 main开始啊。开始看吧。

下面是 dog.c 的main函数部分。当然，有些内容我省略掉了。首先是这个 init_commands(&commands) 这个函数，我们需要注意下，我们跟进去，看看这个函数具体在干什么。

int main(int argc, char **argv){int ch, longindex, ret;unsigned long flags;struct option *long_options;const struct command *commands;const char *short_options;char *p;const struct sd_option *sd_opts;uint8_t sdhost[16];int sdport;install_crash_handler(crash_handler);init_commands(&commands);if (argc < 2)usage(commands, 0);flags = setup_commands(commands, argv[1], argv[2]);optind = 3;sd_opts = build_sd_options(command_opts);long_options = build_long_options(sd_opts);short_options = build_short_options(sd_opts);mytest_func();while ((ch = getopt_long(argc, argv, short_options, long_options,&longindex)) >= 0) {      ......}if (!is_stdout_console() || raw_output)highlight = false;if (flags & CMD_NEED_NODELIST) {ret = update_node_list(SD_MAX_NODES);if (ret < 0) {sd_err("Failed to get node list");exit(EXIT_SYSFAIL);}}if (flags & CMD_NEED_ARG && argc == optind)subcommand_usage(argv[1], argv[2], EXIT_USAGE);if (init_event(EPOLL_SIZE) < 0)exit(EXIT_SYSFAIL);if (init_work_queue(get_nr_nodes) != 0) {sd_err("Failed to init work queue");exit(EXIT_SYSFAIL);}if (sockfd_init()) {sd_err("sockfd_init() failed");exit(EXIT_SYSFAIL);}ret = command_fn(argc, argv);if (ret == EXIT_USAGE)subcommand_usage(argv[1], argv[2], EXIT_USAGE);return ret;}

init_commands(const struct command **commands) 命令初始化函数。

static void init_commands(const struct command **commands){// This is static and be assignment and returned as a pointer.static struct command *cmds;struct command command_list[] = {vdi_command, // in vdi.cnode_command, // in node.ccluster_command, // in cluster.ctrace_command,// in dog.h{NULL,}};if (!cmds) {cmds = (struct command *)xmalloc(sizeof(command_list));memcpy(cmds, command_list, sizeof(command_list));}*commands = cmds;return;}

vdi_command,

struct command vdi_command = {"vdi",vdi_cmd,vdi_parser};

vdi_cmd. 好啦，到这里就差不多了，这里你应该有点感觉了，你再命令行中敲出来的命令，都是存放再这个地方的，对你的每一个命令的响应，也是再这里做出的。比如 dog vdi create.就是第二个 vdi_cmd[1] 所对应的内容. vdi_create 是一个指向函数的指针。我们可以去看看这个函数的具体内容。

static struct subcommand vdi_cmd[] = {{"check", "<vdiname>", "saph", "check and repair image's consistency", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_check, vdi_options},{"create", "<vdiname> <size>", "Pycaphrv", "create an image", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_create, vdi_options},{"snapshot", "<vdiname>", "saphrv", "create a snapshot", NULL, CMD_NEED_ARG, vdi_snapshot, vdi_options},{"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image", NULL, CMD_NEED_ARG, vdi_clone, vdi_options},{"delete", "<vdiname>", "saph", "delete an image", NULL, CMD_NEED_ARG, vdi_delete, vdi_options},{"rollback", "<vdiname>", "saphfrv", "rollback to a snapshot", NULL, CMD_NEED_ARG, vdi_rollback, vdi_options},{"list", "[vdiname]", "aprh", "list images", NULL, 0, vdi_list, vdi_options},{"tree", NULL, "aph", "show images in tree view format", NULL, 0, vdi_tree, vdi_options},{"graph", NULL, "aph", "show images in Graphviz dot format", NULL, 0, vdi_graph, vdi_options},{"object", "<vdiname>", "isaph", "show object information in the image", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_object, vdi_options},{"track", "<vdiname>", "isaph", "show the object epoch trace in the image", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_track, vdi_options},{"setattr", "<vdiname> <key> [value]", "dxaph", "set a VDI attribute", NULL, CMD_NEED_ARG, vdi_setattr, vdi_options},{"getattr", "<vdiname> <key>", "aph", "get a VDI attribute", NULL, CMD_NEED_ARG, vdi_getattr, vdi_options},{"resize", "<vdiname> <new size>", "aph", "resize an image", NULL, CMD_NEED_ARG, vdi_resize, vdi_options},{"read", "<vdiname> [<offset> [<len>]]", "saph", "read data from an image", NULL, CMD_NEED_ARG, vdi_read, vdi_options},{"write", "<vdiname> [<offset> [<len>]]", "apwh", "write data to an image", NULL, CMD_NEED_ARG, vdi_write, vdi_options},{"backup", "<vdiname> <backup>", "sFaph", "create an incremental backup between two snapshots", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_backup, vdi_options},{"restore", "<vdiname> <backup>", "saph", "restore snapshot images from a backup", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_restore, vdi_options},{"cache", "<vdiname>", "saph", "Run 'dog vdi cache' for more information", vdi_cache_cmd, CMD_NEED_ARG, vdi_cache, vdi_options},{NULL,},};

vdi_create 额，不好意思，我只是随便选了一个，没想到这个函数这么长，但是创建一个 vdi 的过程就是这样的。当然里面又引出了很多新的东西，这就是需要我们去认真分析的东西。相信已经看到了，里面最重要的过程应该是那个 ret = do_vdi_create() 过程。那是下一个应该考虑的过程。

static int vdi_create(int argc, char **argv){const char *vdiname = argv[optind++];uint64_t size;uint32_t vid;uint64_t oid;uint32_t idx, max_idx, ret, nr_copies = vdi_cmd_data.nr_copies;struct sd_inode *inode = NULL;if (!argv[optind]) {sd_err("Please specify the VDI size");return EXIT_USAGE;}ret = option_parse_size(argv[optind], &size);if (ret < 0)return EXIT_USAGE;if (size > SD_OLD_MAX_VDI_SIZE && 0 == vdi_cmd_data.store_policy) {sd_err("VDI size is larger than %s bytes, please use '-y' to "       "create a hyper volume with size up to %s bytes",       strnumber(SD_OLD_MAX_VDI_SIZE),       strnumber(SD_MAX_VDI_SIZE));return EXIT_USAGE;}if (size > SD_MAX_VDI_SIZE) {sd_err("VDI size is too large");return EXIT_USAGE;}if (nr_copies > sd_nodes_nr) {sd_err("There are not enough nodes(%d) to hold the copies(%d)",       sd_nodes_nr, nr_copies);return EXIT_USAGE;}ret = do_vdi_create(vdiname, size, 0, &vid, false,    vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy,    vdi_cmd_data.store_policy);if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)goto out;inode = xmalloc(sizeof(*inode));ret = dog_read_object(vid_to_vdi_oid(vid), inode, sizeof(*inode), 0,      true);if (ret != SD_RES_SUCCESS) {sd_err("Failed to read a newly created VDI object");ret = EXIT_FAILURE;goto out;}max_idx = DIV_ROUND_UP(size, SD_DATA_OBJ_SIZE);for (idx = 0; idx < max_idx; idx++) {vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);oid = vid_to_data_oid(vid, idx);ret = dog_write_object(oid, 0, NULL, 0, 0, 0, inode->nr_copies,      inode->copy_policy, true, true);if (ret != SD_RES_SUCCESS) {ret = EXIT_FAILURE;goto out;}INODE_SET_VID(inode, idx, vid);ret = sd_inode_write_vid(dog_bnode_writer, inode, idx, vid, vid, 0, false, true);if (ret) {ret = EXIT_FAILURE;goto out;}}vdi_show_progress(idx * SD_DATA_OBJ_SIZE, inode->vdi_size);ret = EXIT_SUCCESS;if (verbose) {if (raw_output)printf("%x\n", vid);elseprintf("VDI ID of newly created VDI: %x\n", vid);}out:free(inode);return ret;}

上面只是讲了一下 sheepdog 中 dog 的一小部分，并且没有深究，当然，我们是需要深究这部分的，看每一个功能从上层到下层的具体实现，这都是很有必要的。今天这些只是讲了最外层的部分，从交互入手，我想这也是认识一个系统的一个比较自然的过程吧。由于时间关系，先写这些，欢迎讨论，待续。。。

0 0