CTE可以在SELCT,INSERT,CREATE TABLE AS SELECT 或 CREATE VIEW AS SELECT语句中使用。
使用:
--select语句中的CTE
with q1 as (select num,name,age from student where num = 95002)
select *
from q1;-- from风格
with q1 as (select num,name,age from student where num = 95002)
from q1
select *;-- chaining CTEs 链式
with q1 as ( select * from student where num = 95002),q2 as ( select num,name,age from q1)
select * from (select num from q2) a;-- union
with q1 as (select * from student where num = 95002),q2 as (select * from student where num = 95004)
select * from q1 union all select * from q2;--视图,CTAS和插入语句中的CTE
-- insert
create table s1 like student;with q1 as ( select * from student where num = 95002)
from q1
insert overwrite table s1
select *;select * from s1;-- ctas
create table s2 as
with q1 as ( select * from student where num = 95002)
select * from q1;-- view
create view v1 as
with q1 as ( select * from student where num = 95002)
select * from q1;select * from v1;
--隐式联接表示法
SELECT *
FROM table1 t1, table2 t2, table3 t3
WHERE t1.id = t2.id AND t2.id = t3.id AND t1.zipcode = '02535';--支持非等值连接
SELECT a.* FROM a JOIN b ON (a.id = b.id)
SELECT a.* FROM a JOIN b ON (a.id = b.id AND a.department = b.department)
SELECT a.* FROM a LEFT OUTER JOIN b ON (a.id <> b.id)
--A:
select a.*,b.* from employee a,employee_address b where a.id=b.id;
--B:
select * from employee a cross join employee_address b on a.id=b.id;
select * from employee a cross join employee_address b where a.id=b.id;
--C:
select * from employee a inner join employee_address b on a.id=b.id;
SELECT a.* FROM a JOIN b ON (a.id = b.id)
SELECT a.* FROM a JOIN b ON (a.id = b.id AND a.department = b.department)
SELECT a.* FROM a LEFT OUTER JOIN b ON (a.id <> b.id)
同一个查询中可以连接2个以上的表
SELECT a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key2)
如果每个表在联接字句使用相同的列,则Hive将多个表上的联接转换为单个MR作业
SELECT a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key1)
--由于联接中仅涉及b的key1列,因此被转换为1个MR作业来执行
SELECT a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key2)
--会转换为两个MR作业,因为在第一个连接条件中使用了b中的key1列,而在第二个连接条件中使用了b中的key2列。
-- 第一个map / reduce作业将a与b联接在一起,然后将结果与c联接到第二个map / reduce作业中。
SELECT a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key1)
--由于联接中仅涉及b的key1列,因此被转换为1个MR作业来执行,并且表a和b的键的特定值的值被缓冲在reducer的内存中。然后,对于从c中检索的每一行,将使用缓冲的行来计算联接。
SELECT a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key2)
--计算涉及两个MR作业。其中的第一个将a与b连接起来,并缓冲a的值,同时在reducer中流式传输b的值。
-- 在第二个MR作业中,将缓冲第一个连接的结果,同时将c的值通过reducer流式传输。
SELECT /*+ STREAMTABLE(a) */ a.val, b.val, c.val FROM a JOIN b ON (a.key = b.key1) JOIN c ON (c.key = b.key1)
--a,b,c三个表都在一个MR作业中联接,并且表b和c的键的特定值的值被缓冲在reducer的内存中。
-- 然后,对于从a中检索到的每一行,将使用缓冲的行来计算联接。如果省略STREAMTABLE提示,则Hive将流式传输最右边的表。
join在WHERE条件之前进行。
如果除一个要连接的表之外的所有表都很小,则可以将其作为仅map作业执行(mapjoin)。
SELECT /*+ MAPJOIN(b) */ a.key, a.value FROM a JOIN b ON a.key = b.key
--不需要reducer。对于A的每个Mapper,B都会被完全读取。限制是不能执行FULL / RIGHT OUTER JOIN b。