全文索引

来源:互联网 发布:中南大学网络教育玉溪 编辑:程序博客网 时间:2024/04/27 00:28
create table t1(id int,name varchar2(10));create index ind_t1 on t1(name);insert into t1 values(1,'Tom');insert into t1 values(1,'Tom Tom');insert into t1 select empno,ename from emp;commit;create table t2  as select * From t1;create index ind_t2 on t2(name) indextype is ctxsys.context;BEGIN         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',         tabname => 't1',         estimate_percent =>100,          method_opt => 'for all columns size 1',        degree => 8,         cascade=>TRUE         );         END;BEGIN         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',         tabname => 't2',         estimate_percent =>100,          method_opt => 'for all columns size 1',        degree => 8,         cascade=>TRUE         );         END;SQL> select  * From t2 where contains(name,'Tom')>0;ID NAME---------- ---------- 1 Tom 2 Tom TomExecution Plan----------------------------------------------------------Plan hash value: 1615937155--------------------------------------------------------------------------------------| Id  | Operation    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------------------|   0 | SELECT STATEMENT    |     |   2 |  20 |   2   (0)| 00:00:01 ||   1 |  TABLE ACCESS BY INDEX ROWID| T2     |   2 |  20 |   2   (0)| 00:00:01 ||*  2 |   DOMAIN INDEX    | IND_T2 |     |     |   0   (0)| 00:00:01 |--------------------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)Statistics---------------------------------------------------------- 11  recursive calls  0  db block gets 22  consistent gets  0  physical reads  0  redo size660  bytes sent via SQL*Net to client523  bytes received via SQL*Net from client  2  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk)  2  rows processedSQL> select * from t1 where name like '%Tom%';ID NAME---------- ---------- 1 Tom 2 Tom TomExecution Plan----------------------------------------------------------Plan hash value: 3418867520--------------------------------------------------------------------------------------| Id  | Operation    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------------------|   0 | SELECT STATEMENT    |     |   1 |  10 |   2   (0)| 00:00:01 ||   1 |  TABLE ACCESS BY INDEX ROWID| T1     |   1 |  10 |   2   (0)| 00:00:01 ||*  2 |   INDEX FULL SCAN    | IND_T1 |   1 |     |   1   (0)| 00:00:01 |--------------------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   2 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)Statistics----------------------------------------------------------  1  recursive calls  0  db block gets  4  consistent gets  0  physical reads  0  redo size660  bytes sent via SQL*Net to client523  bytes received via SQL*Net from client  2  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk)  2  rows processedinsert into t1 select * From t1;insert into t1 select * From t1;insert into t1 select * From t1;insert into t1 select * From t1;insert into t1 select * From t1;insert into t2 select * from t2;insert into t2 select * from t2;insert into t2 select * from t2;insert into t2 select * from t2;insert into t2 select * from t2;BEGIN         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',         tabname => 't1',         estimate_percent =>100,          method_opt => 'for all columns size 1',        degree => 8,         cascade=>TRUE         );         END;BEGIN         DBMS_STATS.GATHER_TABLE_STATS(ownname => 'scott',         tabname => 't2',         estimate_percent =>100,          method_opt => 'for all columns size 1',        degree => 8,         cascade=>TRUE         );         END;SQL> select * from t1 where name like '%Tom%';ID NAME---------- ---------- 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom 1 Tom Tom64 rows selected.Execution Plan----------------------------------------------------------Plan hash value: 3617692013--------------------------------------------------------------------------| Id  | Operation  | Name | Rows  | Bytes | Cost (%CPU)| Time |--------------------------------------------------------------------------|   0 | SELECT STATEMENT  | |   339 |  3390 |     7   (0)| 00:00:01 ||*  1 |  TABLE ACCESS FULL| T1 |   339 |  3390 |     7   (0)| 00:00:01 |--------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)Statistics----------------------------------------------------------  0  recursive calls  0  db block gets 57  consistent gets  0  physical reads  0  redo size       2272  bytes sent via SQL*Net to client567  bytes received via SQL*Net from client  6  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk) 64  rows processedSQL> select  * From t2 where contains(name,'Tom')>0;ID NAME---------- ---------- 1 Tom 1 Tom TomExecution Plan----------------------------------------------------------Plan hash value: 1615937155--------------------------------------------------------------------------------------| Id  | Operation    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------------------|   0 | SELECT STATEMENT    |     | 848 |8480 |   9   (0)| 00:00:01 ||   1 |  TABLE ACCESS BY INDEX ROWID| T2     | 848 |8480 |   9   (0)| 00:00:01 ||*  2 |   DOMAIN INDEX    | IND_T2 |     |     |   0   (0)| 00:00:01 |--------------------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)Statistics---------------------------------------------------------- 11  recursive calls  0  db block gets 19  consistent gets  0  physical reads  0  redo size660  bytes sent via SQL*Net to client523  bytes received via SQL*Net from client  2  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk)  2  rows processedSQL> alter index ind_t2 rebuild parameters('sync');Index altered.SQL> select  * From t2 where contains(name,'Tom')>0;ID NAME---------- ---------- 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom 1 Tom 1 Tom Tom64 rows selected.Execution Plan----------------------------------------------------------Plan hash value: 1615937155--------------------------------------------------------------------------------------| Id  | Operation    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------------------|   0 | SELECT STATEMENT    |     |  63 | 630 |   7   (0)| 00:00:01 ||   1 |  TABLE ACCESS BY INDEX ROWID| T2     |  63 | 630 |   7   (0)| 00:00:01 ||*  2 |   DOMAIN INDEX    | IND_T2 |     |     |   2   (0)| 00:00:01 |--------------------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)Statistics---------------------------------------------------------- 11  recursive calls  0  db block gets 48  consistent gets  0  physical reads  0  redo size       2272  bytes sent via SQL*Net to client567  bytes received via SQL*Net from client  6  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk) 64  rows processed
从上面的测试结果可以看出,全文索引的效果不是很好,不管数量是多还是少,逻辑读都比普通索引的全表扫要高。为什么会这样呢? 因为全文索引比表要大。查看全文索引的大小:SQL> SET LONG 2000000000SQL> SET pagesize 2000SQL> SELECT ctx_report.index_size('IND_T2') FROM DUAL;CTX_REPORT.INDEX_SIZE('IND_T2')--------------------------------------------------------------------------------===========================================================================INDEX SIZE FOR SCOTT.IND_T2===========================================================================TABLE:     SCOTT.DR$IND_T2$ITABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 8BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 65,536 (64.00 KB)LOB SEGMENT:     SCOTT.SYS_LOB0000080748C00006$$TABLE NAME:     SCOTT.DR$IND_T2$ILOB COLUMN:     TOKEN_INFOTABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 3BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 24,576 (24.00 KB)INDEX (NORMAL):      SCOTT.DR$IND_T2$XTABLE NAME:     SCOTT.DR$IND_T2$ITABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 4BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 32,768 (32.00 KB)INDEX (LOB):     SCOTT.SYS_IL0000080748C00006$$TABLE NAME:     SCOTT.DR$IND_T2$ITABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 4BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 32,768 (32.00 KB)INDEX (IOT):     SCOTT.SYS_IOT_TOP_80751TABLE NAME:     SCOTT.DR$IND_T2$KTABLESPACE NAME:     USERSBLOCKS ALLOCATED:24BLOCKS USED:24BYTES ALLOCATED:       196,608 (192.00 KB)BYTES USED:       196,608 (192.00 KB)INDEX (IOT):     SCOTT.SYS_IOT_TOP_80756TABLE NAME:     SCOTT.DR$IND_T2$NTABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 4BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 32,768 (32.00 KB)TABLE:     SCOTT.DR$IND_T2$RTABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 8BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 65,536 (64.00 KB)LOB SEGMENT:     SCOTT.SYS_LOB0000080753C00002$$TABLE NAME:     SCOTT.DR$IND_T2$RLOB COLUMN:     DATATABLESPACE NAME:     USERSBLOCKS ALLOCATED:24BLOCKS USED:24BYTES ALLOCATED:       196,608 (192.00 KB)BYTES USED:       196,608 (192.00 KB)INDEX (LOB):     SCOTT.SYS_IL0000080753C00002$$TABLE NAME:     SCOTT.DR$IND_T2$RTABLESPACE NAME:     USERSBLOCKS ALLOCATED: 8BLOCKS USED: 4BYTES ALLOCATED: 65,536 (64.00 KB)BYTES USED: 32,768 (32.00 KB)TOTALS FOR INDEX SCOTT.IND_T2---------------------------------------------------------------------------TOTAL BLOCKS ALLOCATED:        104TOTAL BLOCKS USED:83TOTAL BYTES ALLOCATED:       851,968 (832.00 KB)TOTAL BYTES USED:       679,936 (664.00 KB)从上面可以看出,全文索引插入的数据都是在临时表中,需要手动同步到正式表中去alter index ind_t2 rebuild parameters('sync'),这样做的好处是,插入数据后,不会对索引进行锁定,不影正式表中对数据的操作,比较适合于数据仓库。


通过对表中继续增加数据后查看SQL> select * from t1 where name like '%Tom%';8192 rows selected.Execution Plan----------------------------------------------------------Plan hash value: 3617692013--------------------------------------------------------------------------| Id  | Operation  | Name | Rows  | Bytes | Cost (%CPU)| Time |--------------------------------------------------------------------------|   0 | SELECT STATEMENT  | | 21709 |   212K|   276   (2)| 00:00:04 ||*  1 |  TABLE ACCESS FULL| T1 | 21709 |   212K|   276   (2)| 00:00:04 |--------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   1 - filter("NAME" LIKE '%Tom%' AND "NAME" IS NOT NULL)Statistics----------------------------------------------------------  0  recursive calls  0  db block gets       7410  consistent gets  0  physical reads  0  redo size     216876  bytes sent via SQL*Net to client       6529  bytes received via SQL*Net from client548  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk)       8192  rows processedSQL> select  * From t2 where contains(name,'Tom')>0;8320 rows selected.Execution Plan----------------------------------------------------------Plan hash value: 1615937155--------------------------------------------------------------------------------------| Id  | Operation    | Name   | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------------------|   0 | SELECT STATEMENT    |     |4119 | 41190 |1588   (0)| 00:00:20 ||   1 |  TABLE ACCESS BY INDEX ROWID| T2     |4119 | 41190 |1588   (0)| 00:00:20 ||*  2 |   DOMAIN INDEX    | IND_T2 |     |     |1377   (0)| 00:00:17 |--------------------------------------------------------------------------------------Predicate Information (identified by operation id):---------------------------------------------------   2 - access("CTXSYS"."CONTAINS"("NAME",'Tom')>0)Statistics---------------------------------------------------------- 11  recursive calls  0  db block gets       6638  consistent gets  0  physical reads  0  redo size     219900  bytes sent via SQL*Net to client       6617  bytes received via SQL*Net from client556  SQL*Net roundtrips to/from client  0  sorts (memory)  0  sorts (disk)       8320  rows processedSQL> select count(8) from t1;  COUNT(8)----------    868352通过查看可以看出,t1表和t2表中的数量大小868352,全文索引的逻辑读比普通索引的逻辑读要低,但是代价比较高。





原创粉丝点击