关于库表散列-阿里云大数据专业认证-程序博客网

mysql> select goodsId,goodsName from goods;

+----------------+--------------+
| goodsId        | goodsName    |
+----------------+--------------+
| 0B0ADABA002002 | 鍗у 绯诲垪 |
| 0B0ADABA001001 | 瀹㈠巺绯诲垪 |
| 0B0ADABA002003 | 鍗у 绯诲垪 |
| 0B0ADABA001002 | 瀹㈠巺绯诲垪 |
| 0B0ADABA002004 | 椁愬巺绯诲垪 |
| 0B0ADABA001003 | 鍗у 绯诲垪 |
| 0B0ADABA002005 | 鍗у 绯诲垪 |
| 0B0ADABA001004 | 鍗у 绯诲垪 |
| 0B0ADABA003001 | 瀹㈠巺绯诲垪 |
| 0B0ADABA002001 | 涔︽埧绯诲垪 |
+----------------+--------------+
10 rows in set (0.00 sec)

mysql> select goodsId,goodsName from goods where goodsId like '%002%';

+----------------+--------------+
| goodsId        | goodsName    |
+----------------+--------------+
| 0B0ADABA002002 | 鍗у 绯诲垪 |
| 0B0ADABA002003 | 鍗у 绯诲垪 |
| 0B0ADABA001002 | 瀹㈠巺绯诲垪 |
| 0B0ADABA002004 | 椁愬巺绯诲垪 |
| 0B0ADABA002005 | 鍗у 绯诲垪 |
| 0B0ADABA002001 | 涔︽埧绯诲垪 |
+----------------+--------------+
6 rows in set (0.00 sec)

mysql> select goodsId,goodsName from goods where goodsId like '________002%';
+----------------+--------------+
| goodsId        | goodsName    |
+----------------+--------------+
| 0B0ADABA002002 | 鍗у 绯诲垪 |
| 0B0ADABA002003 | 鍗у 绯诲垪 |
| 0B0ADABA002004 | 椁愬巺绯诲垪 |
| 0B0ADABA002005 | 鍗у 绯诲垪 |
| 0B0ADABA002001 | 涔︽埧绯诲垪 |
+----------------+--------------+
5 rows in set (0.00 sec)

mysql> select ASCII(goodsId) from goods;
+----------------+
| ASCII(goodsId) |
+----------------+
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
|             48 |
+----------------+
10 rows in set (0.01 sec)

mysql> SELECT CONCAT(2,'test');
+------------------+
| CONCAT(2,'test') |
+------------------+
| 2test            |
+------------------+
1 row in set (0.00 sec)

mysql> SELECT CAST(38.8 AS CHAR);
+--------------------+
| CAST(38.8 AS CHAR) |
+--------------------+
| 38.8               |
+--------------------+
1 row in set (0.00 sec)

mysql> select SUBSTRING(goodsId,4,2) from goods;
+------------------------+
| SUBSTRING(goodsId,4,2) |
+------------------------+
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
| AD                     |
+------------------------+
10 rows in set (0.00 sec)

mysql> select ASCII( SUBSTRING(goodsId,4,2)) from goods;
+--------------------------------+
| ASCII( SUBSTRING(goodsId,4,2)) |
+--------------------------------+
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
|                             65 |
+--------------------------------+
10 rows in set (0.00 sec)

mysql> select ASCII( SUBSTRING(goodsId,4,2)),ASCII( SUBSTRING(goodsId,5,2)) from goods;
+--------------------------------+--------------------------------+
| ASCII( SUBSTRING(goodsId,4,2)) | ASCII( SUBSTRING(goodsId,5,2)) |
+--------------------------------+--------------------------------+
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
|                             65 |                             68 |
+--------------------------------+--------------------------------+
10 rows in set (0.00 sec)

mysql> select ASCII( SUBSTRING(goodsId,4,2))*100+ASCII( SUBSTRING(goodsId,5,2)) from goods;
+-------------------------------------------------------------------+
| ASCII( SUBSTRING(goodsId,4,2))*100+ASCII( SUBSTRING(goodsId,5,2)) |
+-------------------------------------------------------------------+
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
|                                                              6568 |
+-------------------------------------------------------------------+
10 rows in set (0.00 sec)

数据库中的散列法是使用计算值来分配表格数据的方法，它比在整个索引中搜索要好的多。一个哈希散列答应你在数据库表格中存储数据，以便这些行的要害计算的相同值存储在相同的位置。

为了在哈希散列中找到一个行，查询机应用哈希函数到一个行的要害值，然后分配和那个值相关的数据块。在很多情况下，一个哈希散列比一个普通的索引快。
　　Oracle在Oracle 7面世的时候就支持哈希散列。哈希散列的优势仅仅在于当表格的访问在要害值上首先使用的是=操作符，这个表格是静态的，并且仅仅当数据行需要的时候。当和一个普通非索引或者散列表格比较的时候，全表扫描就会变慢。
　　比如，假设你想要创建一个表格来查找英语单词的发音。你需要迅速的分配一个英语单词的发音，但是一个字典，比如cmudict0.3有大概106，000个单词。
　　创建哈希散列最大的工作任务就是分析参数。你需要计算分析每个散列的带和散列要害字包含的内容数量。假如三类是正整数，那么你可以设置最小值的大小假如你需要定义自己哈希函数。
　　对于任何其他类型的数据，比如下面这个列子，你需要计算参数的最小大小。你可以通过使用DBMS_UTILITY.GET_HASH_VALUE函数来获得一个确切的估计。
　　create table cmudict
　　(
　　Word varchar2(22) primary key,
　　pron varchar2(62)
　　);
　　使用其他的快速装入程序比如SQL*来装入这些数据：
　　select max(blksize)
　　from (select sum(3+nvl(vsize(word),0)+1+nvl(vsize(pron),0)+1) blksize
　　from cmudict group by dbms_utility.get_hash_value(word,1,10007)) blkqry;
　　NVL(VSIZE(col),0)+1表达式使用每个栏来分析字节的数字。3+是行的开销，所以表达式的总和是存储每个行所需要的大小。在每个哈希函数组中，求和行的大小，我们获得每个散列数据块所许哟啊的字节数量。1是真的不切实际，但是10007，一个质数，是对哈希要害字参数的最好猜测。
　　增加或者减少这个值到另外一个质数知道上面的分析结果降低数据块大小的重要性。这些数字，分析的结果和GET_HASH_VALUE的第三个参数应该被使用来创造散列。
　　现在你可以创造一个散列并使用这个散列存储数据再创建一个表格。
　　create cluster cmudict_cluster (word varchar2(22))
　　size 6000
　　single table
　　hashkeys 739;
　　create table cmudict
　　(
　　word varchar2(22) not null,
　　pron varchar2(62)
　　)
　　cluster cmudict_cluster(word);
　　假如你需要分析这个散列：
　　analyze cluster cmudict_cluster compute statistics;
　　select * from user_clusters where cluster_name = 'CMUDICT_CLUSTER';
　　你可以看到AVG_BLOCKS_PER_KEY 是 1.
　　可以为查询语句做注解select * from cmudict where word = 'HELLO' returns:
　　SELECT STATEMENT
　　TABLE Access HASH
　　CMUDICT

//////////////////////////////

oracle数据库表分区

关键字: oracle数据库表分区 Oracle表分区

自从oracle8i 开始可以把一个表分割为多个小的部分,这样可以对oracle的性能优化带来很大的好处~
例如：改善表的查询性能,更加容易管理表数据,备份和恢复操作更方便

在oracle 中分区表分为好几种的(范围分区,散列分区,子分区,列表分区,索引分区)下面我们来慢慢介绍

现在我们来建立一个[范围分区]
create table RangeTable(
id int primary key,
name varchar(20),
grade int
)
partition by rang(grade)
(
partition part1 values less then(50) tablespace Part1_tb,
partition part2 values less then(MAXVALUE) tablespace Part2_tb
);
如果grade的值小于50的话就把记录放到名为part1的分区当中,part1分区将被存储在Part1_tb表空间中
其他的就放在part2中 MAXVALUE是oracle的关键字表示最大值

[散列分区]
create table HashTable(
id int primary key,
name varchar(20),
grade int
)
/*有两种方式,1就是指定分区数目和所使用的表空间,2指定以命名的分区*/
partition by hash(grade)
partitions 10 -- 指定分区的数目
store in(Part1_tb,Part2_tb,Part3_tb) --如果指定的分区数目比表空间多,分区会以循环方式分配到表空间
/*------------------------------------*/
partition by rang(grade)--这种方式就是指定以命名的分区
(
partition part1 tablespace Part1_tb,
partition part2 tablespace Part2_tb
);

[子分区]即是分区的分区
create table ChildTable(
id int primary key,
name varchar(20),
grade int
)
partition by rang(grade)
subpartition by hash(grade)
partitions 5
(
partition part1 values less then(30) tablespace Part1_tb,
partition part2 values less then(60) tablespace Part2_tb,
partition part3 values less then(MAXVALUE) tablespace Part3_tb
);

[列表分区]告诉oracle所有可能的值
create table ListTable(
id int primary key,
name varchar(20),
area varchar(10)
)
partition by list(area)
(
partition part1 values('guangdong','beijing') tablespace Part1_tb,
partition part2 values('shanghai','nanjing') tablespace Part2_tb
);

[索引分区]索引也可以按照和表进行分区时使用的相同的值范围来分区
create index IndexTable_index
on IndexTable(name)
local
(
partition part1 tablespace Part1_tb,
partition part2 tablespace Part2_tb
)
--local 告诉oracle表 IndexTable的每一个分区建立一个独立的索引
create index IndexTable_index
on IndexTable(name)
global;
--global为全局索引全局索引可以包含多个分区的值局部索引比全局索引容易管理,而全局索引比较快
注意:不能为散列分区或者子分区创建全局索引

查询某一个表分区
select * from table partition(part1);