更新时间:2024-12-31 GMT+08:00

CREATE TABLE AS

语法

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [catalog_name.][db_name.]table_name [ ( column_alias, ... ) ]

[[PARTITIONED BY(col_name, ....)] [SORT BY ([column [, column ...]])] ]

[COMMENT 'table_comment']

[ WITH ( property_name = expression [, ...] ) ]

[[STORED AS file_format]

[LOCATION 'obs_path']

[TBLPROPERTIES (orc_table_property = value [, ...] ) ] ]

AS query

[ WITH [ NO ] DATA ]

限制

① 和 ②的语法不能组合使用。

当使用了avro_schema_url属性时,以下操作是不支持的:

  • 不支持CREATE TABLE AS操作
  • 使用CREATE TABLE时不支持partitioned_by 和 bucketed_by
  • 不支持使用alter table修改column

描述

创建包含SELECT查询结果的新表。

使用CREATE TABLE创建空表。

使用IF NOT EXISTS子句时,如果表已经存在则不会报错。

可选WITH子句可用于设置新创建的表的属性,如表的存储位置(location)、是不是外表(external)等。

示例

  • 用指定列的查询结果创建新表orders_column_aliased:
    CREATE TABLE orders_column_aliased (order_date, total_price) 
    AS
    SELECT orderdate, totalprice FROM orders;
  • 用表orders的汇总结果新建一个表orders_by_data:
    CREATE TABLE orders_by_date
    COMMENT 'Summary of orders by date'
    WITH (format = 'ORC')
    AS
    SELECT orderdate, sum(totalprice) AS price
    FROM orders
    GROUP BY orderdate;
  • 如果表orders_by_date不存在,则创建表orders_by_date:
    CREATE TABLE IF NOT EXISTS orders_by_date AS
    SELECT orderdate, sum(totalprice) AS price
    FROM orders
    GROUP BY orderdate;
  • 用和表orders具有相同schema创建新表empty_orders table,但是没数据:
    CREATE TABLE empty_orders AS
    SELECT *
    FROM orders
    WITH NO DATA;
  • 使用VALUES 创建表,参考 VALUES
  • 分区表示例:
    CREATE EXTERNAL TABLE hetu_copy(corderkey, corderstatus, ctotalprice, corderdate, cds) 
     PARTITIONED BY(cds)  
     SORT BY (corderkey, corderstatus) 
     COMMENT 'test'  
     STORED AS orc  
     LOCATION 'obs://{bucket}/user/hetuserver/tmp'  
     TBLPROPERTIES (orc_bloom_filter_fpp = 0.3, orc_compress = 'SNAPPY', orc_compress_size = 6710422, orc_bloom_filter_columns = 'corderstatus,ctotalprice')  
     as select * from hetu_test; 
      
     CREATE TABLE hetu_copy1(corderkey, corderstatus, ctotalprice, corderdate, cds)  
     WITH (partitioned_by = ARRAY['cds'], bucketed_by = ARRAY['corderkey', 'corderstatus'],  
     sorted_by = ARRAY['corderkey', 'corderstatus'],  
     bucket_count = 16,  
     orc_compress = 'SNAPPY',  
     orc_compress_size = 6710422,  
     orc_bloom_filter_columns = ARRAY['corderstatus', 'ctotalprice'],  
     external = true,  
     format = 'orc',  
     location = 'obs://{bucket}/user/hetuserver/tmp') 
      as select * from hetu_test;