更新时间:2024-12-06 GMT+08:00
Hive SQL与SQL2003标准有哪些兼容性问题
本文列举目前已发现的Hive SQL与SQL2003标准兼容性问题。
- 不支持在having中写视图。
举例如下:
select c_last_name ,c_first_name ,s_store_name ,sum(netpaid) paid from ssales where i_color = 'chiffon' group by c_last_name ,c_first_name ,s_store_name having sum(netpaid) > (select 0.05*avg(netpaid) from ssales);
报错:
Error: Error while compiling statement: FAILED:ParseException line 46:23 cannot recognize input near 'select' '0.05' '*' in expression specification (state=42000,code=40000)
- Having不支持子查询。
举例如下:
select ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001000000 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'SAUDI ARABIA' ) order by value desc;
报错:
Error: Error while compiling statement: FAILED: SemanticException Line 0:-1 Unsupported SubQuery Expression ''SAUDI ARABIA'': Only SubQuery expressions that are top level conjuncts are allowed (state=42000,code=40000)
- 不支持多查询结果当成多个字段输出。
举例如下:
select c_count, count(*) as custdist from ( select c_custkey, count(o_orderkey) from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%pending%requests%' group by c_custkey ) as c_orders (c_custkey, c_count) group by c_count order by custdist desc, c_count desc;
报错:
Error: Error while compiling statement: FAILED: ParseException line 1:213 missing EOF at '(' near 'c_orders' (state=42000,code=40000)
- 不支持把查询的结果当成字段比较。
举例如下:
select sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#25' and p_container = 'MED JAR' and l_quantity < ( select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey );
报错:
Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 14:4 Unsupported SubQuery Expression 'ps_suppkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
- 多表关联的过滤条件中,不支持按not in或in的子查询过滤
举例如下:
select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt from partsupp, part where p_partkey = ps_partkey and p_brand <> 'Brand#12' and p_type not like 'PROMO PLATED%' and p_size in (25, 2, 43, 9, 35, 36, 48, 24) and ps_suppkey in ( select s_suppkey as ps_suppkey from supplier where s_comment like '%Customer%Complaints%' ) group by p_brand, p_type, p_size order by supplier_cnt desc, p_brand, p_type, p_size;
报错:
Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 14:4 Unsupported SubQuery Expression 'ps_suppkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
- 多表关联不支持not in和in在子查询中过滤。
举例如下:
select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, lineitem where o_orderkey in ( select l_orderkey from lineitem group by l_orderkey having sum(l_quantity) > 315 ) and c_custkey = o_custkey and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100;
报错:
Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 13:0 Unsupported SubQuery Expression 'o_orderkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
- 关联条件中不支持多个exists查询体。
举例如下:
select s_name, count(*) as numwait from supplier, lineitem l1, orders, nation where s_suppkey = l1.l_suppkey and o_orderkey = l1.l_orderkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from lineitem l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from lineitem l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'VIETNAM' group by s_name order by numwait desc, s_name limit 100;
报错:
Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 23:8 Unsupported SubQuery Expression 'l_commitdate': Only 1 SubQuery expression is supported. (state=42000,code=10249)
- 不支持多层级in嵌套子查询。
举例如下:
select i_item_id item_id, sum(sr_return_quantity) sr_item_qty from store_returns, item, date_dim where sr_item_sk = i_item_sk and d_date in (select d_date from date_dim where d_week_seq in (select d_week_seq from date_dim where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) and sr_returned_date_sk = d_date_sk group by i_item_id), cr_items as (select i_item_id item_id, sum(cr_return_quantity) cr_item_qty from catalog_returns, item, date_dim where cr_item_sk = i_item_sk);
报错:
Unsupported SubQuery Expression 'd_week_seq': SubQuery cannot use the table alias: date_dim; this is also an alias in the Outer Query and SubQuery contains a unqualified column reference (state=42000,code=10249)
父主题: Hive应用开发常见问题