更新时间:2024-12-06 GMT+08:00

Hive SQL与SQL2003标准有哪些兼容性问题

本文列举目前已发现的Hive SQL与SQL2003标准兼容性问题。

  • 不支持在having中写视图。

    举例如下:

    select c_last_name
          ,c_first_name
          ,s_store_name
          ,sum(netpaid) paid
    from ssales
    where i_color = 'chiffon'
    group by c_last_name
            ,c_first_name
            ,s_store_name
    having sum(netpaid) > (select 0.05*avg(netpaid) from ssales);

    报错

    Error: Error while compiling statement: FAILED:ParseException line 46:23 cannot recognize input near 'select' '0.05' '*' in expression specification (state=42000,code=40000)

  • Having不支持子查询。

    举例如下:

    select
           ps_partkey,
           sum(ps_supplycost * ps_availqty) as value
    from
           partsupp,
           supplier,
           nation
    where
           ps_suppkey = s_suppkey
           and s_nationkey = n_nationkey
           and n_name = 'SAUDI ARABIA'
    group by
           ps_partkey having
                  sum(ps_supplycost * ps_availqty) > (
                         select
                                sum(ps_supplycost * ps_availqty) * 0.0001000000
                         from
                                partsupp,
                                supplier,
                                nation
                         where
                                ps_suppkey = s_suppkey
                                and s_nationkey = n_nationkey
                                and n_name = 'SAUDI ARABIA'
                  )
    order by
           value desc;

    报错:

    Error: Error while compiling statement: FAILED: SemanticException Line 0:-1 Unsupported SubQuery Expression ''SAUDI ARABIA'': Only SubQuery expressions that are top level conjuncts are allowed (state=42000,code=40000)

  • 不支持多查询结果当成多个字段输出。

    举例如下:

    select
           c_count,
           count(*) as custdist
    from
           (
                  select
                         c_custkey,
                         count(o_orderkey)
                  from
                         customer left outer join orders on
                                c_custkey = o_custkey
                                and o_comment not like '%pending%requests%'
                  group by
                         c_custkey
           ) as c_orders (c_custkey, c_count)
    group by
           c_count
    order by
           custdist desc,
           c_count desc;

    报错:

    Error: Error while compiling statement: FAILED: ParseException line 1:213 missing EOF at '(' near 'c_orders' (state=42000,code=40000)
  • 不支持把查询的结果当成字段比较。

    举例如下:

    select
           sum(l_extendedprice) / 7.0 as avg_yearly
    from
           lineitem,
           part
    where
           p_partkey = l_partkey
           and p_brand = 'Brand#25'
           and p_container = 'MED JAR'
           and l_quantity < (
                  select
                         0.2 * avg(l_quantity)
                  from
                         lineitem
                  where
                         l_partkey = p_partkey
           );

    报错:

    Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 14:4 Unsupported SubQuery Expression 'ps_suppkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
  • 多表关联的过滤条件中,不支持按not in或in的子查询过滤

    举例如下:

    select
    	p_brand,
    	p_type,
    	p_size,
    	count(distinct ps_suppkey) as supplier_cnt
    from
    	partsupp,
    	part
    where
    	p_partkey = ps_partkey
    	and p_brand <> 'Brand#12'
    	and p_type not like 'PROMO PLATED%'
    	and p_size in (25, 2, 43, 9, 35, 36, 48, 24)
    	and ps_suppkey in (
    		select
    			s_suppkey as ps_suppkey
    		from
    			supplier
    		where
    			s_comment like '%Customer%Complaints%'
    	)
    group by
    	p_brand,
    	p_type,
    	p_size
    order by
    	supplier_cnt desc,
    	p_brand,
    	p_type,
    	p_size;
    

    报错:

    Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 14:4 Unsupported SubQuery Expression 'ps_suppkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
  • 多表关联不支持not in和in在子查询中过滤。

    举例如下:

    select
           c_name,
           c_custkey,
           o_orderkey,
           o_orderdate,
           o_totalprice,
           sum(l_quantity)
    from
           customer,
           orders,
           lineitem
    where
           o_orderkey in (
                  select
                         l_orderkey
                  from
                         lineitem
                  group by
                         l_orderkey having
                                sum(l_quantity) > 315
           )
           and c_custkey = o_custkey
           and o_orderkey = l_orderkey
    group by
           c_name,
           c_custkey,
           o_orderkey,
           o_orderdate,
           o_totalprice
    order by
           o_totalprice desc,
           o_orderdate
    limit 100;

    报错:

    Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 13:0 Unsupported SubQuery Expression 'o_orderkey': Correlating expression contains ambiguous column references. (state=42000,code=10249)
  • 关联条件中不支持多个exists查询体。

    举例如下:

    select
           s_name,
           count(*) as numwait
    from
           supplier,
           lineitem l1,
           orders,
           nation
    where
           s_suppkey = l1.l_suppkey
           and o_orderkey = l1.l_orderkey
           and o_orderstatus = 'F'
           and l1.l_receiptdate > l1.l_commitdate
           and exists (
                  select
                         *
                  from
                         lineitem l2
                  where
                         l2.l_orderkey = l1.l_orderkey
                         and l2.l_suppkey <> l1.l_suppkey
           )
           and not exists (
                  select
                         *
                  from
                         lineitem l3
                  where
                         l3.l_orderkey = l1.l_orderkey
                         and l3.l_suppkey <> l1.l_suppkey
                         and l3.l_receiptdate > l3.l_commitdate
           )
           and s_nationkey = n_nationkey
           and n_name = 'VIETNAM'
    group by
           s_name
    order by
           numwait desc,
           s_name
    limit 100;

    报错:

    Error: Error while compiling statement: FAILED: SemanticException [Error 10249]: Line 23:8 Unsupported SubQuery Expression 'l_commitdate': Only 1 SubQuery expression is supported. (state=42000,code=10249)
  • 不支持多层级in嵌套子查询。

    举例如下:

    select i_item_id item_id,
             sum(sr_return_quantity) sr_item_qty
      from store_returns,
           item,
           date_dim
      where sr_item_sk = i_item_sk
      and   d_date    in
             (select d_date
             from date_dim
             where d_week_seq in
                     (select d_week_seq
                     from date_dim
               where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
      and   sr_returned_date_sk   = d_date_sk
      group by i_item_id),
      cr_items as
      (select i_item_id item_id,
             sum(cr_return_quantity) cr_item_qty
      from catalog_returns,
           item,
           date_dim
      where cr_item_sk = i_item_sk);

    报错:

    Unsupported SubQuery Expression 'd_week_seq': SubQuery cannot use the table alias: date_dim; this is also an alias in the Outer Query and SubQuery contains a unqualified column reference (state=42000,code=10249)