一、数据准备:
- create table student(
- id int ,
- s_name varchar(20),
- t_id int
- );
-
- create table teacher(
- id int ,
- t_name varchar(20)
- );
-
- insert into student values (1,'zhangsan',3) , (2,'lisi',2) , (3,'wangwu',1), (4,'zhaoliu',4) ;
- insert into teacher values(1, 'zhaolaoshi'),(2, 'qianlaoshi'),(3, 'sunlaoshi'),(6, 'lilaoshi');
-
- 以上为准备数据。
- 以下为测试准备SQL,不需要执行。
- create index student_tid_idx on student(t_id);
- create index student_id_idx on student(id);
- create index teacher_id_idx on teacher(id);
-
- reindex table student;
- reindex table teacher;
-
- set enable_seqscan to off;
- set enable_hashjoin to off;
- set enable_mergejoin to off;
-
- drop index student_tid_idx;
- drop index student_id_idx;
- drop index teacher_id_idx;
二、数据库连接种类
1.自然连接
不需要指定连接列,也不需要使用on语句,默认按照相同列名进行筛选。
select * from student natural join teacher;
- demo=# select * from student natural join teacher;
- id | s_name | t_id | t_name
- ----+----------+------+------------
- 1 | zhangsan | 3 | zhaolaoshi
- 2 | lisi | 2 | qianlaoshi
- 3 | wangwu | 1 | sunlaoshi
- (3 行记录)
2.内连接
(1)隐式语法
select * from student s,teacher t where s.t_id = t.id;
- demo=# select * from student s,teacher t where s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- (3 行记录)
(2)显示语法
写法1:
select * from student s inner join teacher t on s.t_id = t.id;
- demo=# select * from student s inner join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- (3 行记录)
写法2:
select * from student s join teacher t on s.t_id = t.id;
- demo=# select * from student s join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- (3 行记录)
3.外连接
(1)左外连接
写法1:
select * from student s left join teacher t on s.t_id = t.id;
- demo=# select * from student s left join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- 4 | zhaoliu | 4 | |
- (4 行记录)
写法2:
select * from student s left outer join teacher t on s.t_id = t.id;
- demo=# select * from student s left outer join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- 4 | zhaoliu | 4 | |
- (4 行记录)
写法3:
select * from student s,teacher t where s.t_id = t.id(+);
- demo=# select * from student s,teacher t where s.t_id = t.id(+);
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- 4 | zhaoliu | 4 | |
- (4 行记录)
(2)右外连接
写法1:
select * from student s right join teacher t on s.t_id = t.id;
- demo=# select * from student s right join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- | | | 6 | lilaoshi
- (4 行记录)
写法2:
select * from student s right outer join teacher t on s.t_id = t.id;
- demo=# select * from student s right outer join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- | | | 6 | lilaoshi
- (4 行记录)
写法3:
select * from student s,teacher t where s.t_id(+) = t.id;
- demo=# select * from student s,teacher t where s.t_id(+) = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- | | | 6 | lilaoshi
- (4 行记录)
(3)全外连接
写法1:
select * from student s full join teacher t on s.t_id = t.id;
- demo=# select * from student s full join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- | | | 6 | lilaoshi
- 4 | zhaoliu | 4 | |
- (5 行记录)
写法2:
select * from student s full outer join teacher t on s.t_id = t.id;
- demo=# select * from student s full outer join teacher t on s.t_id = t.id;
- id | s_name | t_id | id | t_name
- ----+----------+------+----+------------
- 3 | wangwu | 1 | 1 | zhaolaoshi
- 2 | lisi | 2 | 2 | qianlaoshi
- 1 | zhangsan | 3 | 3 | sunlaoshi
- | | | 6 | lilaoshi
- 4 | zhaoliu | 4 | |
- (5 行记录)
4.半连接
写法1:
select * from student s where exists (select * from teacher t where s.t_id = t.id) ;
- demo=# select * from student s where exists (select * from teacher t where s.t_id = t.id) ;
- id | s_name | t_id
- ----+----------+------
- 1 | zhangsan | 3
- 2 | lisi | 2
- 3 | wangwu | 1
- (3 行记录)
写法2:
select * from student s where s.t_id in (select id from teacher t) ;
- demo=# select * from student s where s.t_id in (select id from teacher t) ;
- id | s_name | t_id
- ----+----------+------
- 1 | zhangsan | 3
- 2 | lisi | 2
- 3 | wangwu | 1
- (3 行记录)
5.反半连接
写法1:
select * from student s where not exists (select * from teacher t where s.t_id = t.id) ;
- demo=# select * from student s where not exists (select * from teacher t where s.t_id = t.id) ;
- id | s_name | t_id
- ----+---------+------
- 4 | zhaoliu | 4
- (1 行记录)
写法2:
select * from student s where s.t_id not in (select id from teacher t) ;
- demo=# select * from student s where s.t_id not in (select id from teacher t) ;
- id | s_name | t_id
- ----+---------+------
- 4 | zhaoliu | 4
- (1 行记录)
三、数据库连接方法:
1.嵌套循环连接
(1)嵌套循环连接
嵌套循环连接是最基础的连接操作,任何连接条件都可以使用这种连接方式,但这种连接方式代价过高,因此PG中很少使用这种连接方式,更多的会使用以下几种嵌套循环连接的变种。
(2)物化嵌套循环连接
嵌套循环连接每读取一条左表(外表)中的元组时,都需要扫描右表(内表)中的所有元组,为每条外表记录做内表全扫描,这个过程代价高昂,
PG支持对内表做临时物化,并存储在内存或者临时文件中,在处理内表元组时,临时元组比缓冲区管理器更为高效,特别是内表比较小的时候,
可以全部载入内存时,我们将这种连接方式称之为物化嵌套循环连接
- demo=# explain (analyze , buffers) select * from student s , teacher t ;
- QUERY PLAN
- -------------------------------------------------------------------------------------------------------------------
- Nested Loop (cost=0.00..9714.75 rows=774000 width=128) (actual time=0.014..0.020 rows=16 loops=1)
- Buffers: shared hit=2
- -> Seq Scan on teacher t (cost=0.00..19.00 rows=900 width=62) (actual time=0.007..0.007 rows=4 loops=1)
- Buffers: shared hit=1
- -> Materialize (cost=0.00..22.90 rows=860 width=66) (actual time=0.001..0.002 rows=4 loops=4)
- Buffers: shared hit=1
- -> Seq Scan on student s (cost=0.00..18.60 rows=860 width=66) (actual time=0.002..0.003 rows=4 loops=1)
- Buffers: shared hit=1
- Planning Time: 0.045 ms
- Execution Time: 0.037 ms
- (10 行记录)
(3)外表索引嵌套循环连接
如果右表(内表)上有索引,且该索引能用于搜索满足连接条件的元组,那么计划器会为考虑使用索引进行直接搜索,以代替顺序扫描。
这种叫作索引嵌套循环连接
- set enable_seqscan to off;
- set enable_hashjoin to off;
- set enable_mergejoin to off;
- create index teacher_id_idx on teacher(id);
- reindex table teacher;
- demo=# explain (analyze , buffers) select * from student s , teacher t where s.t_id = t.id and t.id = 1;
- QUERY PLAN
- ---------------------------------------------------------------------------------------------------------------------------------
- Nested Loop (cost=10000000000.13..10000000009.21 rows=1 width=128) (actual time=0.013..0.014 rows=1 loops=1)
- Buffers: shared hit=3
- -> Seq Scan on student s (cost=10000000000.00..10000000001.05 rows=1 width=66) (actual time=0.006..0.007 rows=1 loops=1)
- Filter: (t_id = 1)
- Rows Removed by Filter: 3
- Buffers: shared hit=1
- -> Index Scan using teacher_id_idx on teacher t (cost=0.13..8.15 rows=1 width=62) (actual time=0.004..0.005 rows=1 loops=1)
- Index Cond: (id = 1)
- Buffers: shared hit=2
- Planning Time: 0.119 ms
- Execution Time: 0.031 ms
- (11 行记录)
(4)外表索引扫描物化嵌套循环连接
在索引嵌套循环连接的定义基础上,将内表(右表)物化,注:内表需要有索引
- set enable_seqscan to off;
- create index teacher_id_idx on teacher(id);
- reindex table teacher;
- demo=# explain (analyze , buffers) select * from student s inner join teacher t on s.t_id = t.id and t.id <=6;
- QUERY PLAN
- ---------------------------------------------------------------------------------------------------------------------------------------
- Nested Loop (cost=10000000000.13..10000000009.25 rows=1 width=128) (actual time=0.016..0.021 rows=3 loops=1)
- Join Filter: (s.t_id = t.id)
- Rows Removed by Join Filter: 13
- Buffers: shared hit=3
- -> Seq Scan on student s (cost=10000000000.00..10000000001.04 rows=4 width=66) (actual time=0.005..0.005 rows=4 loops=1)
- Buffers: shared hit=1
- -> Materialize (cost=0.13..8.15 rows=1 width=62) (actual time=0.001..0.002 rows=4 loops=4)
- Buffers: shared hit=2
- -> Index Scan using teacher_id_idx on teacher t (cost=0.13..8.15 rows=1 width=62) (actual time=0.004..0.005 rows=4 loops=1)
- Index Cond: (id <= 6)
- Buffers: shared hit=2
- Planning Time: 0.173 ms
- Execution Time: 0.041 ms
- (13 行记录)
(5)外表索引扫描索引嵌套循环连接
在索引嵌套循环连接的基础上,外表(左表)和内表(右表)上都有索引,且索引可以用于连接条件
- set enable_seqscan to off;
- set enable_hashjoin to off;
- set enable_mergejoin to off;
- create index student_tid_idx on student(t_id);
- create index teacher_id_idx on teacher(id);
- reindex table student;
- reindex table teacher;
- demo=# explain (analyze , buffers) select * from student s inner join teacher t on s.t_id = t.id and t.id <=6;
- QUERY PLAN
- ----------------------------------------------------------------------------------------------------------------------------------
- Nested Loop (cost=0.26..16.31 rows=1 width=128) (actual time=0.013..0.019 rows=3 loops=1)
- Buffers: shared hit=9
- -> Index Scan using teacher_id_idx on teacher t (cost=0.13..8.15 rows=1 width=62) (actual time=0.006..0.007 rows=4 loops=1)
- Index Cond: (id <= 6)
- Buffers: shared hit=2
- -> Index Scan using student_tid_idx on student s (cost=0.13..8.15 rows=1 width=66) (actual time=0.001..0.001 rows=1 loops=4)
- Index Cond: (t_id = t.id)
- Buffers: shared hit=7
- Planning Time: 0.242 ms
- Execution Time: 0.038 ms
- (10 行记录)
2.归并连接
(1)归并连接
内外表按照连接条件进行排序后,再进行连接,与嵌套循环连接不同的是,归并连接只适用于自然连接与等值连接
- set enable_hashjoin to off;
- set enable_mergejoin to on;
- set enable_seqscan to on;
- demo=# explain (analyze , buffers) select * from student s, teacher t where s.t_id = t.id and t.id > 1;
- QUERY PLAN
- ----------------------------------------------------------------------------------------------------------------
- Merge Join (cost=2.15..2.21 rows=3 width=29) (actual time=0.017..0.019 rows=2 loops=1)
- Merge Cond: (s.t_id = t.id)
- Buffers: shared hit=2
- -> Sort (cost=1.08..1.09 rows=4 width=15) (actual time=0.008..0.009 rows=4 loops=1)
- Sort Key: s.t_id
- Sort Method: quicksort Memory: 25kB
- Buffers: shared hit=1
- -> Seq Scan on student s (cost=0.00..1.04 rows=4 width=15) (actual time=0.004..0.004 rows=4 loops=1)
- Buffers: shared hit=1
- -> Sort (cost=1.07..1.08 rows=3 width=14) (actual time=0.005..0.005 rows=3 loops=1)
- Sort Key: t.id
- Sort Method: quicksort Memory: 25kB
- Buffers: shared hit=1
- -> Seq Scan on teacher t (cost=0.00..1.05 rows=3 width=14) (actual time=0.003..0.003 rows=3 loops=1)
- Filter: (id > 1)
- Rows Removed by Filter: 1
- Buffers: shared hit=1
- Planning Time: 0.112 ms
- Execution Time: 0.038 ms
- (19 行记录)
(2)物化归并
在归并连接定义的基础上,将内表(右表)物化的归并
(3)外表索引扫描归并
归并连接定义的基础上,外表有索引,且索引适用于连接条件
- set enable_seqscan to off;
- create index teacher_id_idx on teacher(id);
- reindex table teacher;
- demo=# explain (analyze , buffers) select * from student s , teacher t where s.t_id = t.id and t.id>1;
- QUERY PLAN
- ------------------------------------------------------------------------------------------------------------------------------------
- Merge Join (cost=10000000001.21..10000000009.30 rows=3 width=29) (actual time=0.017..0.020 rows=2 loops=1)
- Merge Cond: (s.t_id = t.id)
- Buffers: shared hit=3
- -> Sort (cost=10000000001.08..10000000001.09 rows=4 width=15) (actual time=0.009..0.009 rows=4 loops=1)
- Sort Key: s.t_id
- Sort Method: quicksort Memory: 25kB
- Buffers: shared hit=1
- -> Seq Scan on student s (cost=10000000000.00..10000000001.04 rows=4 width=15) (actual time=0.004..0.005 rows=4 loops=1)
- Buffers: shared hit=1
- -> Index Scan using teacher_id_idx on teacher t (cost=0.13..12.18 rows=3 width=14) (actual time=0.004..0.004 rows=3 loops=1)
- Index Cond: (id > 1)
- Buffers: shared hit=2
- Planning Time: 0.122 ms
- Execution Time: 0.042 ms
- (14 行记录)
(4)外表索引扫描物化归并
在外表索引扫描归并连接的基础定义上,将内表物化的连接
(5)外表索引扫描索引归并连接
在外表索引扫描归并连接的基础上,内表也有索引,且索引可以用于连接条件
- create index teacher_id_idx on teacher(id);
- create index student_tid_idx on student(t_id);
- reindex table teacher;
- reindex table student;
- demo=# explain (analyze , buffers) select * from student s , teacher t where s.t_id = t.id and t.id>1;
- QUERY PLAN
- -----------------------------------------------------------------------------------------------------------------------------------
- Merge Join (cost=0.26..20.40 rows=3 width=29) (actual time=0.010..0.013 rows=2 loops=1)
- Merge Cond: (s.t_id = t.id)
- Buffers: shared hit=4
- -> Index Scan using student_tid_idx on student s (cost=0.13..12.19 rows=4 width=15) (actual time=0.003..0.004 rows=4 loops=1)
- Buffers: shared hit=2
- -> Index Scan using teacher_id_idx on teacher t (cost=0.13..12.18 rows=3 width=14) (actual time=0.003..0.003 rows=3 loops=1)
- Index Cond: (id > 1)
- Buffers: shared hit=2
- Planning Time: 0.269 ms
- Execution Time: 0.033 ms
- (10 行记录)
3.散列连接
(1)内存散列连接
与归并连接类似,散列连接只能适用于自然连接与等值连接,内存散列连接是在work_mem中处理,散列表区域被称为处理批次,一个处理批
次会有多少散列槽,被称为桶,桶的数量由nodeHash.c中定义的ExecChooseHashTableSize函数确定,且桶的数量总是2的整数次幂
- set enable_hashjoin to on;
- set enable_mergejoin to on;
- set enable_seqscan to on;
- demo=# explain (analyze , buffers) select * from student s, teacher t where s.t_id = t.id;
- QUERY PLAN
- ----------------------------------------------------------------------------------------------------------------
- Hash Join (cost=1.09..2.19 rows=4 width=29) (actual time=0.020..0.023 rows=3 loops=1)
- Hash Cond: (s.t_id = t.id)
- Buffers: shared hit=2
- -> Seq Scan on student s (cost=0.00..1.04 rows=4 width=15) (actual time=0.007..0.008 rows=4 loops=1)
- Buffers: shared hit=1
- -> Hash (cost=1.04..1.04 rows=4 width=14) (actual time=0.006..0.007 rows=4 loops=1)
- Buckets: 1024 Batches: 1 Memory Usage: 9kB
- Buffers: shared hit=1
- -> Seq Scan on teacher t (cost=0.00..1.04 rows=4 width=14) (actual time=0.002..0.003 rows=4 loops=1)
- Buffers: shared hit=1
- Planning Time: 0.067 ms
- Execution Time: 0.042 ms
- (12 行记录)
(2)带倾斜的混合散列连接
在内存散列定义的基础只上,如果内表的元组无法全部载入内存单个处理批次时,PG就用创建多个批次,只工作区中只分配一个批次,其它批次
都以临时文件的形式创建,属于某个批次的元组将通过临时元组存储功能写入相应的文件中
