which xlog fsync method support O_DIRECT bypasses the kernel buffers

来源:互联网 发布:网络证券交易业务模式 编辑:程序博客网 时间:2024/05/16 09:46

Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:

  • Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
  • Postgres-XL的项目发起人Mason Sharp
  • pgpool的作者石井达夫(Tatsuo Ishii)
  • PG-Strom的作者海外浩平(Kaigai Kohei)
  • Greenplum研发总监姚延栋
  • 周正中(德哥), PostgreSQL中国用户会创始人之一
  • 汪洋,平安科技数据库技术部经理
  • ……


 
  • 2015年度PG大象会报名地址:http://postgres2015.eventdove.com/
  • PostgreSQL中国社区: http://postgres.cn/
  • PostgreSQL专业1群: 3336901(已满)
  • PostgreSQL专业2群: 100910388
  • PostgreSQL专业3群: 150657323



  • PostgreSQL WAL日志的fsync方法较多, 哪些方法支持O_DIRECT呢?
    所有支持的fsync方法 : 

    #wal_sync_method = fsync                # the default is the first option
                                            # supported by the operating system:
                                            #   open_datasync
                                            #   fdatasync (default on Linux)
                                            #   fsync
                                            #   fsync_writethrough
                                            #   open_sync


    从代码来看, 只有open_sync和open_datasync支持O_DIRECT , 旁路内核buffer, 但是要旁路内核BUFFER, 还需要符合一个条件.
    未开启归档, 没有下游节点使用流复制协议来复制XLOG数据.
    原因是开启归档或有下游流复制节点时, XLOG文件写入后还会被读出来, 所以O_DIRECT没有意义.

    涉及代码如下 : 
    1. src/include/access/xlogdefs.h

    /*
     *      Because O_DIRECT bypasses the kernel buffers, and because we never
     *      read those buffers except during crash recovery or if wal_level != minimal,
     *      it is a win to use it in all cases where we sync on each write().  We could
     *      allow O_DIRECT with fsync(), but it is unclear if fsync() could process
     *      writes not buffered in the kernel.  Also, O_DIRECT is never enough to force
     *      data to the drives, it merely tries to bypass the kernel cache, so we still
     *      need O_SYNC/O_DSYNC.
     */
    #ifdef O_DIRECT
    #define PG_O_DIRECT                             O_DIRECT
    #else
    #define PG_O_DIRECT                             0
    #endif

    #if defined(O_SYNC)
    #define OPEN_SYNC_FLAG          O_SYNC
    #elif defined(O_FSYNC)
    #define OPEN_SYNC_FLAG          O_FSYNC
    #endif


    2. src/backend/access/transam/xlog.c

    /*
     * GUC support
     */
    const struct config_enum_entry sync_method_options[] = {
            {"fsync", SYNC_METHOD_FSYNC, false},
    #ifdef HAVE_FSYNC_WRITETHROUGH
            {"fsync_writethrough", SYNC_METHOD_FSYNC_WRITETHROUGH, false},
    #endif
    #ifdef HAVE_FDATASYNC
            {"fdatasync", SYNC_METHOD_FDATASYNC, false},
    #endif
    #ifdef OPEN_SYNC_FLAG
            {"open_sync", SYNC_METHOD_OPEN, false},
    #endif
    #ifdef OPEN_DATASYNC_FLAG
            {"open_datasync", SYNC_METHOD_OPEN_DSYNC, false},
    #endif
            {NULL, 0, false}
    };


    /*
     * Return the (possible) sync flag used for opening a file, depending on the
     * value of the GUC wal_sync_method.
     */
    static int
    get_sync_bit(int method)
    {
            int                     o_direct_flag = 0;

            /* If fsync is disabled, never open in sync mode */
            if (!enableFsync)
                    return 0;

            /*
             * Optimize writes by bypassing kernel cache with O_DIRECT when using
             * O_SYNC/O_FSYNC and O_DSYNC.  But only if archiving and streaming are
             * disabled, otherwise the archive command or walsender process will read
             * the WAL soon after writing it, which is guaranteed to cause a physical
             * read if we bypassed the kernel cache. We also skip the
             * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
             * reason.
             *
             * Never use O_DIRECT in walreceiver process for similar reasons; the WAL
             * written by walreceiver is normally read by the startup process soon
             * after its written. Also, walreceiver performs unaligned writes, which
             * don't work with O_DIRECT, so it is required for correctness too.
             */
            if (!XLogIsNeeded() && !AmWalReceiverProcess())
                    o_direct_flag = PG_O_DIRECT;

            switch (method)
            {
                            /*
                             * enum values for all sync options are defined even if they are
                             * not supported on the current platform.  But if not, they are
                             * not included in the enum option array, and therefore will never
                             * be seen here.
                             */
                    case SYNC_METHOD_FSYNC:
                    case SYNC_METHOD_FSYNC_WRITETHROUGH:
                    case SYNC_METHOD_FDATASYNC:
                            return 0;
    #ifdef OPEN_SYNC_FLAG
                    case SYNC_METHOD_OPEN:
                            return OPEN_SYNC_FLAG | o_direct_flag;
    #endif
    #ifdef OPEN_DATASYNC_FLAG
                    case SYNC_METHOD_OPEN_DSYNC:
                            return OPEN_DATASYNC_FLAG | o_direct_flag;
    #endif
                    default:
                            /* can't happen (unless we are out of sync with option array) */
                            elog(ERROR, "unrecognized wal_sync_method: %d", method);
                            return 0;                       /* silence warning */
            }
    }

     
    0 0
    原创粉丝点击