MySQL优化:可配置选项的WAIT_FOR_READ

来源:互联网 发布:608所怎么样 知乎 编辑:程序博客网 时间:2024/05/17 08:55

转载请署名:印风

-----------------------------------

http://bugs.mysql.com/bug.php?id=64258

innodb层使用一个常量WAIT_FOR_READ来控制当需要等待从磁盘读取数据时,需要等待的时间,其默认值在5.55.1的版本中都是5000us5ms),而现在比较牛的存储设备(flush/ssd)一般能在100us内完成读操作,BUG64258认为这个值应该是个可配置的选项,通过设定符合的值以符合我们的硬件设备性能。

这是个static静态变量,在文件buf0buf.c中会用到WAIT_FOR_READ:

buf/buf0buf.c:280:staticconst int WAIT_FOR_READ = 5000;buf/buf0buf.c:2091: os_thread_sleep(WAIT_FOR_READ);buf/buf0buf.c:2632: os_thread_sleep(WAIT_FOR_READ);buf/buf0buf.c:2880: os_thread_sleep(WAIT_FOR_READ);


在两个函数buf_page_get_genbuf_page_get_zip会被调用到,这里我们只考虑前者,看看在什么情况下会进入sleep状态

这是个通用的获取数据库page的函数,比较冗长,在经过检查bufferpool、异步请求磁盘页以及对压缩页的处理等一大堆代码后,调用如下代码段:

2858     switch (rw_latch) {2859     case RW_NO_LATCH:2860         if (must_read) {2861             /* Let us wait until the read operation2862             completes */2863 2864             if (innobase_get_slow_log() && trx && trx->take_stats)2865             {2866                 ut_usectime(&sec, &ms);2867                 start_time = (ib_uint64_t)sec * 1000000 + ms;2868             } else {2869                 start_time = 0;2870             }2871             for (;;) {2872                 enum buf_io_fix io_fix;2873 2874                 mutex_enter(&block->mutex);2875                 io_fix = buf_block_get_io_fix(block);2876                 mutex_exit(&block->mutex);2877 2878                 if (io_fix == BUF_IO_READ) {2879 2880                     os_thread_sleep(WAIT_FOR_READ);2881                 } else {2882                     break;2883                 }2884             }2885             if (innobase_get_slow_log() && trx && trx->take_stats && start_time)2886             {2887                 ut_usectime(&sec, &ms);2888                 finish_time = (ib_uint64_t)sec * 1000000 + ms;2889                 trx->io_reads_wait_timer += (ulint)(finish_time - start_time);2890             }2891         }

io_fix的含义不是很了解,看看注释:

56/** Flags for io_fix types */57enum buf_io_fix {58 BUF_IO_NONE = 0, /**< no pending I/O */59 BUF_IO_READ, /**< read pending */60 BUF_IO_WRITE /**< write pending */61};

其中这里用到的是BUF_IO_READ,应该是read pending,可能是正在等待磁盘读的一个IO状态标识。


从代码里,我们可以看到,当当前的block->page->io_fixBUF_IO_READ时,会不停的在一个for(;;)里循环,每次检查后,会sleep WAIT_FOR_READ us后再次检查。如果这是一个高速存储设备,sleep的时间太长显然是不合理的。


以下是一个简单的patch,增加了一个选项innobase_wait_for_read,来控制sleep的时间,基于percona5.5.18

手头有ssd测试环境的同学,帮忙测试看看有木有效果...

diff -ur Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c--- Percona-Server-5.5.18.stock/storage/innobase/buf/buf0buf.c  2012-01-07 16:38:37.000000000 +0800+++ Percona-Server-5.5.18.sleep/storage/innobase/buf/buf0buf.c  2012-02-17 16:22:05.000000000 +0800@@ -57,6 +57,8 @@ /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); +extern innobase_wait_for_read;+ inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) {    ulint           block_hash;@@ -276,8 +278,6 @@ */  #ifndef UNIV_HOTBACKUP-/** Value in microseconds */-static const int WAIT_FOR_READ = 5000; /** Number of attemtps made to read in a page in the buffer pool */ static const ulint BUF_PAGE_READ_MAX_RETRIES = 100; @@ -2088,7 +2088,7 @@             if (io_fix == BUF_IO_READ) { -               os_thread_sleep(WAIT_FOR_READ);+               os_thread_sleep(innobase_wait_for_read);            } else {                break;            }   @@ -2629,7 +2629,7 @@            Try again later. */            //buf_pool_mutex_exit(buf_pool);            mutex_exit(block_mutex);-           os_thread_sleep(WAIT_FOR_READ);+           os_thread_sleep(innobase_wait_for_read);             goto loop;        }   @@ -2877,7 +2877,7 @@                 if (io_fix == BUF_IO_READ) { -                   os_thread_sleep(WAIT_FOR_READ);+                   os_thread_sleep(innobase_wait_for_read);                } else {                    break;                }diff -ur Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc--- Percona-Server-5.5.18.stock/storage/innobase/handler/ha_innodb.cc   2012-01-07 16:38:37.000000000 +0800+++ Percona-Server-5.5.18.sleep/storage/innobase/handler/ha_innodb.cc   2012-02-17 16:33:46.000000000 +0800@@ -198,6 +198,7 @@ static my_bool innobase_buffer_pool_shm_checksum   = TRUE; static uint    innobase_buffer_pool_shm_key        = 0;+ulong innobase_wait_for_read = 0; static char*   internal_innobase_data_file_path    = NULL;@@ -12098,6 +12099,11 @@ //  " or 2 (write at commit, flush once per second).", //  NULL, NULL, 1, 0, 2, 0);+MYSQL_SYSVAR_ULONG(wait_for_read, innobase_wait_for_read,+  PLUGIN_VAR_OPCMDARG,+  "set a value to decide how long when read page operation need to sleep",+  NULL, NULL, 5000, 0, 5000, 0);+ static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,   PLUGIN_VAR_NOCMDARG,   "Use global innodb_flush_log_at_trx_commit value. (default: ON).",@@ -12656,6 +12662,7 @@   MYSQL_SYSVAR(corrupt_table_action),   MYSQL_SYSVAR(lazy_drop_table),   MYSQL_SYSVAR(fake_changes),+  MYSQL_SYSVAR(wait_for_read),   NULL };