mysql join

来源：互联网发布：东京经济大学知乎编辑：程序博客网时间：2024/06/05 22:45

转自：http://blog.csdn.net/wudongxu/article/details/6683846

mysql的join就一个算法nest loop。本文是我学习该算法的一个分享文档。

本文我分析学习了mysql的join过程也就是Nest Loop，其涉及的主要函数包括sub_select、evaluate_join_record、do_select、optimize。这里先看一下bt（ｓｅｌｅｃｔ　＊from a join b on a.id=b.id where a.name='abc' and b.age=19）：

[php] view plaincopy
#0  evaluate_join_record (join=0x1f5d17e0,join_tab=0x1f5d5a08, error=0) at sql_select.cc:11413    
  
#1 0x0000000000613246 in sub_select(join=0x1f5d17e0, join_tab=0x1f5d5a08, end_of_records=<value optimizedout>) at sql_select.cc:11384  
  
#2 0x0000000000613135 in evaluate_join_record(join=0x1f5d17e0, join_tab=0x1f5d57b0, error=<value optimized out>) atsql_select.cc:11510  
  
#3 0x00000000006131f3 in sub_select(join=0x1f5d17e0, join_tab=0x1f5d57b0, end_of_records=<value optimizedout>) at sql_select.cc:11390  
  
#4 0x0000000000627742 in do_select(join=0x1f5d17e0, fields=0x1f53bf88, table=0x0, procedure=0x0) atsql_select.cc:11140  
  
#5 0x000000000062cc5a in JOIN::exec (this=0x1f5bfa70)at sql_select.cc:2314  
  
#6 0x000000000062e928 in mysql_select(thd=0x1f593520, rref_pointer_array=0x1f595600, tables=0x1f5b7010, wild_num=0,fields=<value optimized out>,  
  
   conds=0x0, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0,select_options=2684635648, result=0x1f5b76e0, unit=0x1f595008,  
  
   select_lex=0x1f595430) at sql_select.cc:2509  
  
#7 0x000000000062f35f in handle_select(thd=0x1f593520, lex=0x1f594f68, result=0x1f5b76e0, setup_tables_done_option=0)at sql_select.cc:269  
  
#8 0x00000000005ae161 in execute_sqlcom_select(thd=0x1f593520, all_tables=0x1f5b7010) at sql_parse.cc:5075  
  
#9  0x00000000005b5681in mysql_execute_command(thd=0x1f593520) at sql_parse.cc:2271  
  
#10 0x00000000005baef4 in mysql_parse(thd=0x1f593520, inBuf=0x1f5b6cb0 "SHOW VARIABLES LIKE 'SERVER_ID'",length=31, found_semicolon=0x53d06d98)  
  
   at sql_parse.cc:5994

然后我们直接从mysql_select开始，它主要包括三个操作join:: prepare，join:: optimize，join::exec，其中prepare完成一些判断准备工作如对通配符的处理、having条件、order等的判断初始化等；optimize就是负责优化检索，简单的说就是为了确定一个mysql认为最优的执行计划；而exec则负责执行optimize确定的执行计划。下面我们主要介绍exec过程，它涉及的主要函数包括sub_select、evaluate_join_record、do_select。

[cpp] view plaincopy
do_select (JOIN *join,List<Item> *fields,TABLE*table,Procedure *procedure)  
  
//table只有在需要创建临时表的时候会被赋值，其它情况都为null  
  
Next_select_funcend_select= setup_end_select_func(join); //该函数首先通过判断有没有table，如果有的话说明end_select是更新类型（如update,write），否则就是send操作  
  
 if (join->tables)  
  
 {  
  
   join->join_tab[join->tables-1].next_select= end_select; //把刚才的end_select赋值给最后的jointable（join->tables表示此次join有几个表）  
  
   join_tab=join->join_tab+join->const_tables; //设置第一个要join的表（join->const_tables表示当前的jointable位置，join table在join->join_tab数组中的存放顺序是按在explain解析的顺序，explain在上面的放在前面0，1…[指同一级的]）  
  
 }  
  
error=sub_select(join,join_tab,0);  //最后调用sub_select进行nest loop操作  
  
   if (error == NESTED_LOOP_OK || error == NESTED_LOOP_NO_MORE_ROWS)  
  
     error= sub_select(join,join_tab,1); //已经读取数据结束  
  
sub_select (JOIN *join,JOIN_TAB *join_tab,boolend_of_records)  
  
  if(end_of_records) //如果已经到记录的结束位置直接调用当前join_tab的next_select操作  
  
   return (*join_tab->next_select)(join,join_tab+1,end_of_records);  
  
error=(*join_tab->read_first_record)(join_tab); //这个函数在两个地方赋值，一个是在exec中通过make_simple_join赋值为join_init_read_record（没有被优化的join使用这个方法，并且它通过调用init_read_record，初始化该join_table下的read_record方法，如全表扫描为rr_sequential，然后再调用read_record方法读取第一条记录）；另一种是在optimize中通过pick_table_access_method函数根据join type选择不同的方法，这个jointype就是我们在explain的type所描述的，如对于  
  
 case JT_REF:  
    tab->read_first_record=join_read_always_key;  
  
tab->read_record.read_record= join_read_next_same;  
                   case JT_EQ_REF:  
                       tab->read_first_record=join_read_key;  
   tab->read_record.read_record= join_no_more_records;  
可以看到这里不只初始化了read_first_record，还初始化了read_record的方法  
rc=evaluate_join_record(join, join_tab, error); //对得到的第一条记录判断是否满足条件  
  
while (rc ==NESTED_LOOP_OK)  //循环读取记录直到文件结束位置  
{  
  error= info->read_record(info); //该方法就是上面介绍的read_record方法对于全表扫描调用rr_sequential，对于ref join则调用join_read_next_same方法  
    rc= evaluate_join_record(join, join_tab,error);  
}

[cpp] view plaincopy
evaluate_join_record(JOIN *join, JOIN_TAB *join_tab, interror)  
  
COND*select_cond= join_tab->select_cond; //取出该表的相应condition，该变量的初始化也是在optimize里调用make_join_select然后再调用add_not_null_conds、make_cond_for_table完成相应join_table的select_cond初始化  
  
 if (select_cond)   //如果该table有相应的cond  
  {  
    select_cond_result=test(select_cond->val_int());  //该步骤是完成从引擎获得的数据与query中该table的cond比较的过程；其内部最终调用的是Item_cmpfunc里的接口，如对于简单的数字等值比较使用Item_func_eq::val_int() -à intArg_comparator::compare_int_signed()  
  
    /* check for errors evaluating thecondition */  
  
    if (join->thd->is_error())  
      return NESTED_LOOP_ERROR;  
  }  
  
  if (!select_cond || select_cond_result)  
  {  
    bool found= 1;  //找到满足条件的记录  
    while (join_tab->first_unmatched&& found)  
    {  
  
      /*外连接的情况 
 
        The while condition is always false ifjoin_tab is not 
 
        the last inner join table of an outerjoin operation. 
 
      */  
  
                            ….  
    }  
  
if (found)  
  
    {  
  
      enum enum_nested_loop_state rc;  
  
      /* A match from join_tab is found for thecurrent partial join. */  
  
      rc= (*join_tab->next_select)(join,join_tab+1, 0);  // next_select是在optimize的make_join_readinfo里初始化，它把所有的join表的next_select初始化为sub_selec，也就是说这里如果还有join_tab需要join的话，再次进入sub_select，这就是nest_loop的思想所在。对于最后一个表的next_select则会在do_select里调用 setup_end_select来重新赋值为end_select，这个就说明一个join操作完成，并把数据返回到客户端。  
  
      if (rc != NESTED_LOOP_OK && rc !=NESTED_LOOP_NO_MORE_ROWS)  
  
        return rc;  
  
      if (join->return_tab < join_tab)  
  
        return NESTED_LOOP_OK;  
  
   …  
  
else  
  
   join_tab->read_record.unlock_row(join_tab);//调用rr_unlock_row最终调用引擎的unlock_row对行进行解锁  
  
}  
  
…  

这里小结一下上面出现的三个重要的函数指针：
join_tab->read_first_record：读取第一条记录使用的方法
info->read_record：读取非第一条记录使用的方法，该方法是根据optimize选择的join type来指定的。
join_tab->next_select：join_tab连接下一个table的方法，这里除了最后一个表使用end_select，其它的都使用sub_select。
下面通过两个简单的例子说明一下nest loop的过程：

例子1：select emp_no from salarieswhere salary = 90930; //salary没有索引

这里因为只有一个表，所以join_tab只有一个，并且是全表扫描，所以join_tab->read_first_record、info->read_record都被指定为rr_sequential；join_tab->next_select就为end_send，又因为这是直接返回给客户端所以最终select_send::send_data再调用item->send(protocol, &buffer)通过网络传输给客户端。如果这里是insertselect的话，那么在end_send的时候会调用select_insert::send_data，可以找到这个函数是在sql_insert.cc文件里的，它实质完成的是写数据操作write_record，而不是像刚才的网络send操作。可以用下面的图表示该事例的执行过程：
没有使用索引的select执行过程：

图1

例子2：select * from salaries s joinemployees e on (s.emp_no=e.emp_no) where e.gender='F' and salary=90930;

注：e.gender，s.sqlary都不是索引
首先我们看一下该语句的执行计划：

图2

其实通过这个图，我们也可以大概的猜测到该语句的执行情况：
首先e【join->join_tab[0]->table->alias】表执行sub_select，它通过rr_sequential，获得它的每一条记录，然后通过evaluate_join_record判断这个记录是否满足e.gender=’F’条件【using where】，如果没有满足则接着取下一条，满足的话，则把它的e.emp_no传递给s表，即接下来s【join->join_tab[1]->table->alias】执行sub_select，它的join type是ref，即它是通过索引来获得记录而不是通过全表扫描的方式，即拿e.emp_no的值来检索s的PRIMARY KEY来获得记录【可能有多条，组合key】，最后再通过s的evaluate_join_record判断是否满足salary=90930这个条件，如果满足是直接发送给客户端，否则获得ref 的下一条记录【上面写的多条】，同样进行evaluate_join_record判断。

我们通过gdb来查看上面两个表的三个函数指针来验证上面的过程：
[e]join_tab->read_first_record：join_init_read_recordà rr_sequentialàrnd_next
[e]info->read_record：rr_sequentialàrnd_next
[e]join_tab->next_select：sub_select
[s] join_tab->read_first_record：join_read_always_keyàindex_read
[s] info->read_record：join_read_next_sameà index_next_same
[s] join_tab->next_select：end_select
该过程我们可以通过以下的图来表示：

图3

通过该图也可以清楚的看到nest-loop的思想。嵌套的循环+递归。
上面的两个例子是我们比较常见的并且也和我们的预期执行一样的。下面看另外一种常见的，但不按上面执行的例子。

例子3：select * from salaries s joinemployees e on (s.emp_no=e.emp_no) where e.emp_no=62476 and salary=90930;PRIMARY KEY (`emp_no`)
该语句的执行计划如下图

图4

通过该图我们可以看到该join使用e作为驱动表，并且它直接使用emp_no这个PRIMARY KEY来查找e的记录。我们根据上面两个例子的过程，很容易想到它的执行过程可能是这样：e先调用sub_select，它通过[e]join_tab->read_first_record读取第一条记录，然后循环通过info->read_record读取后面的记录，然后在e.evaluate_join_record中，判断条件（这里没有，因为emp_no是做为index使用），然后再调用s的sub_select，然后s通过[e]join_tab->read_first_record（这里应该是join_read_always_keyàindex_read）读取第一条记录，然后循环通过info->read_record（join_read_next_same à index_next_same）读取后面的记录，然后在s. evaluate_join_record中再判断salary=90930？，如果是返回给客户端。这就是我们预测的执行过程。但事实是否是这样？
答案是否定的，当我通过gdb: b sub_select时发现第一次进入的就是s表，而不是我们预期的e表。这是什么原因呢？
其实有这个现象的根本原因是do_select调用sub_select时指定的join_tab：
即这个join_tab是由const_tables指定的。而这个值则是在optimize的make_join_statistics根据优化情况进行赋值的。这个优化主要是指对const join可以直接获得它的记录，而不必通过sub_select去获得。
这里我们简单说明一下make_join_statistics的过程：

[cpp] view plaincopy
Make_join_statistics：  
  
         Update_ref_and_keys()  //获得所有的可用的索引  
  
         /*loop until no more const tables are found */  
  
do  
  
{  
  
     ....  
  
     /* check if table can be read by key or table only uses const refs */  
  
     if ((keyuse=s->keyuse))  
  
     {  
  
                   ...  
  
                  if (const_ref == eq_part)  
  
{  
  
set_position(join,const_count++,s,start_keyuse);//该表是join const类型，所以直接取得它的记录，并且把join的起始表调整为下个表：const_count++  
  
                 if (create_ref_for_key(join, s,start_keyuse,  
  
                                            found_const_table_map))  
  
                  goto error;  
  
            //直接通过该索引取得该表的记录，其调用的引擎接口为：index_init  
  
                 if ((tmp=join_read_const_table(s,join->positions+const_count-1)))  
  
              }  
  
      }  
  
}while(join->const_table_map& found_ref && ref_changed);  
  
/* Calc how many(possible) matched records in each table */  
s->found_records=s->records=s->table->file->stats.records;  
s->read_time=(ha_rows)s->table->file->scan_time();

//把所有join table放到join->table数组里，并把此时的const_count值赋给join->const_tables,即此时开始join的第一个表为非constjoin类型的表，也是后面首先执行sub_select的表的位置
join->join_tab=stat;

join->map2table=stat_ref;

join->table=join->all_tables=table_vector;

join->const_tables=const_count;

/* Find anoptimal join order of the non-constant tables. */

//调整后面的表的join顺序，即实现explain输出的join顺序

choose_plan(join,all_table_map & ~join->const_table_map)

上面的过程可描述为：首先查找所有可用的索引，然后判断有哪些表是const join类型，对于该类型的直接通过索引获得记录，并且设置后面的join表为非const开始，接着估算每次需要执行多少次匹配，最后对非const table进行一个join排序。
Mysql的优化器是比较复杂的做的事及情况比较多的，这里主要分析了对const join的处理，对于其内部的实现及其它优化没有进行太多的深入学习（做为以后学习的内容）。
通过上面的三个例子我们可以看到Mysql Join的大概过程，该过程简单描述就是依次把每个join tab拿出来，根据索引或全表扫描获得记录，再判断是否满足该表的限制条件，如果满足的话则传递给下一个join tab，如此循环递归直到所有的join tab都join结束，如果该记录满足所有的条件则返回到客户端，否则读取下一条。同时对于const join tab Mysql会进行优化，直接在optimize阶段获得它的数据，以减少nest loop的次数。
注：select * from salaries s join employees e on (s.emp_no=e.emp_no)where s.emp_no=62476 and salary=90930;
这个语句是如何执行？

附：

explain的输出描述：

Explain又称为执行计划，就是让我们知道Mysql是如果执行一条select语句，只支持select。我们使用上面的图2来描述：

Id：表示执行顺序，id越大的越先执行（当有子查询的时候会产生temp table或derive表的时候就会有不同的id，按从小到大排序），相同的id在上面的先执行。

Select_type：该类型主要包括union，子查询，simple表示最简单的join如我们上面看到的。

Table：表示该执行操作的表，如果为数据表示的是id的值。

Type：是最重要的字段，它告诉我们该表使用了哪种join type，即怎么去读取该表的数据。

ALL：表示全表扫描

Const：表最多有一个匹配行，一般表示直接通过Primarykey或UNIQUE获得数据

eq_ref：表示通过对于每个来自于前面的表的行组合，从该表中读取一行。这可能是最好的联接类型，除了const类型。它用在一个索引的所有部分被联接使用并且索引是UNIQUE或PRIMARY KEY。

Ref：ref是相对于eq_ref，它一般是通过联合索引来获得数据，即来自前一个表的join条件是该表的联合索引的一部分。

其它的见参考资料

Possible_key：表示该表可用的key

Key：表示该操作真正使用的key

Key_len：表示该key的len

Ref：表示使用哪个列或常数与key一起从表中读取数据。这个可能来自上一个表或者来自where条件这里就会是const值。

Rows：表示MySQL认为它执行查询时必须检查的行数

Extra：是其它信息包括usingindex（这个index不是前面的key，而是其它key），using where使用where条件，using join buffer，using temporary table等。

通过rows及每部分join过程我们可以大概的估算出join总共检查的行数：

如上面的图2，总共检查行数为：300141（这个值是估计的select count(*) from employees;= 300024） + 120051（selectcount(*) from employees e where e.gender='F';）*4（每次使用Primary查找需要4次）=780345。

通过这个过程，我们可以得到一个优化的基本原则尽量使用最少结果集作为驱动表（join的第一张表）。因为如果这样后面得到的结果集就可能越小，那么整个读取次数也可能更少。

employees数据库来自http://dev.mysql.com/doc/employee/en/employee.html