MSQL优化基础(height_blanced直方图选择率)

来源：互联网发布：python汉诺塔递归算法编辑：程序博客网时间：2024/06/04 21:29

NDV>255, 并且数据分布不均衡，则倾向搜集 height_blanced直方图

准备数据

drop table t1;create table t1(n1 number);declaremin1 number;max1 number;beginfor i in 1..600 loopmin1 := trunc(dbms_random.value(1,200),0);max1 := trunc(dbms_random.value(min1,200),0);for j in min1..max1 loopinsert into t1 values(i);end loop;commit;end loop;for i in 1..200 loopinsert into t1 values(40);insert into t1 values(80);end loop;end;/beginfor i in 1..800 loopinsert into t1 values(40);insert into t1 values(81);end loop;commit;end;/beginfor i in 1..200 loopinsert into t1 values(39);end loop;end;/exec dbms_stats.gather_table_stats(null,'T1',method_opt=>'for all columns size 254');select num_rows from user_tables where table_name='T1';  NUM_ROWS----------     31696  select num_distinct,num_nulls, low_value,high_value,HISTOGRAM from user_tab_columns where table_name='T1'NUM_DISTINCT  NUM_NULLS LOW_VALUE       HIGH_VALUE      HISTOGRAM------------ ---------- --------------- --------------- ---------------         536          0 C102            C207            HEIGHT BALANCED

案例1 ： in-range = var 经常出现的值

选择率 = (val所在的buckets数/bucket总数)*非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS-------------- --------------- ----------... 40              27                10...select * from t1 where n1 = 40;  --1129 rows selected.--------------------------------------------------------------------------| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------|   0 | SELECT STATEMENT  |      |  1248 |  3744 |    14   (8)| 00:00:01 ||*  1 |  TABLE ACCESS FULL| T1   |  1248 |  3744 |    14   (8)| 00:00:01 |--------------------------------------------------------------------------选择率 = (val所在的buckets数/bucket总数)*非空率 = 10/254 =.039370079cardinality = .039370079*31696 =1247.87402  和rows一样

案例1 ： in-range = var 经常出现的值案例2 ： in-range = var 不出现的值

密度 = (bucket总数 - 所有的常见值的bucket数)/bucket总数/(NDV - 常见值的个数)

选择率 = 密度 * 非空率

select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1'ENDPOINT_VALUE ENDPOINT_NUMBER    BUCKETS-------------- --------------- ----------... 40              27                10...594             250          1597             252          2599             253          1...select * from t1 where n1=598 (596,595 之类的 出来都是 157 )--------------------------------------------------------------------------| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------|   0 | SELECT STATEMENT  |      |       |       |    14 (100)|          ||*  1 |  TABLE ACCESS FULL| T1   |   157 |   471 |    14   (8)| 00:00:01 |--------------------------------------------------------------------------select sum(buckets),count(*) from(select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1') where buckets > 1;SUM(BUCKETS)   COUNT(*)------------ ----------          51         19密度 =  (254 - 51)/254/(536 - 19) =.001545866选择率=密度 *1 cardinality=.001545866*31696=48.9977687   和 rows不一样。测试环境 10.2

案例3 ： in-range <

选择率=(Count(Buckets) / Bucket总数)*非空率

select * from t1 where n1<256--------------------------------------------------------------------------| Id  | Operation         | Name | Rows  | Bytes | Cost (%CPU)| Time     |--------------------------------------------------------------------------|   0 | SELECT STATEMENT  |      | 14475 | 43425 |    14   (8)| 00:00:01 ||*  1 |  TABLE ACCESS FULL| T1   | 14475 | 43425 |    14   (8)| 00:00:01 |--------------------------------------------------------------------------select sum(buckets),count(*) from(select endpoint_value,endpoint_number,( endpoint_number - lag(endpoint_number,1) over(order by endpoint_number))  buckets from user_histograms where table_name='T1') where endpoint_value<=256;SUM(BUCKETS)   COUNT(*)------------ ----------         116         96 selectivity= (Count(Buckets) / Bucket总数)*非空率 =116/254 = .456692913cardinality=.456692913*31696=14475.3386和rows一样

0 0