CREATE TABLE
语法
①
CREATE TABLE [ IF NOT EXISTS ]
[catalog_name.][db_name.]table_name (
{ column_name data_type [ NOT NULL ]
[ COMMENT col_comment]
[ WITH ( property_name = expression [, ...] ) ]
| LIKE existing_table_name
[ { INCLUDING | EXCLUDING } PROPERTIES ]
}
[, ...]
)
[ COMMENT table_comment ]
[ WITH ( property_name = expression [, ...] ) ]
②
CREATE [EXTERNAL] TABLE [IF NOT EXISTS]
[catalog_name.][db_name.]table_name (
{ column_name data_type [ NOT NULL ]
[ COMMENT comment ]
[ WITH ( property_name = expression [, ...] ) ]
| LIKE existing_table_name
[ { INCLUDING | EXCLUDING } PROPERTIES ]
}
[, ...]
)
[COMMENT 'table_comment']
[PARTITIONED BY(col_name data_type, ....)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, col_name, ...)] INTO num_buckets BUCKETS] ]
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION 'obs_path']
[TBLPROPERTIES (orc_table_property = value [, ...] ) ]
③
CREATE [EXTERNAL] TABLE [IF NOT EXISTS]
[catalog_name.][db_name.]table_name (
{ column_name data_type [ NOT NULL ]
[ COMMENT comment ]
[ WITH ( property_name = expression [, ...] ) ]
| LIKE existing_table_name
[ { INCLUDING | EXCLUDING } PROPERTIES ]
}
[, ...]
)
[PARTITIONED BY(col_name data_type, ....)]
[SORT BY ([column [, column ...]])]
[COMMENT 'table_comment']
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION 'obs_path']
[TBLPROPERTIES (orc_table_property = value [, ...] ) ]
限制
- 创建分区表时,如果bucket_count为-1且建表语句中未设置buckets,则使用默认值16。
- 默认外部表存储位置{lakeformation_catalog_url}/{schema_name}.db/{table_name},其中{lakeformation_catalog_url}对接的lakeformation catalog配置的location,{schema_name}为建表时使用的schema,{table_name}为表名。
- 不允许向托管表(表属性external = true)插入数据。
描述
使用CREATE TABLE创建一个具有指定列的、新的空表。使用CREATE TABLE AS创建带数据的表。
- 使用可选参数IF NOT EXISTS,如果表已经存在则不会报错。
- WITH子句可用于在新创建的表或单列上设置属性,如表的存储位置(location)、是不是外表(external)等。
- LIKE子句用于在新表中包含来自现有表的所有列定义。可以指定多个LIKE子句,从而允许从多个表中复制列。如果指定了INCLUDING PROPERTIES,则将所有表属性复制到新表中。如果WITH子句指定的属性名称与复制的属性名称相同,则将使用WITH子句中的值。默认是EXCLUDING PROPERTIES属性,而且最多只能为一个表指定INCLUDING PROPERTIES属性。
- PARTITIONED BY能够用于指定分区的列;CLUSTERED BY能够被用于指定分桶的列;SORT BY和 SORTED BY能够用于给指定的分桶列进行排序;BUCKETS能够被用于指定分桶数;EXTERNAL可用于指定创建外部表;STORED AS能被用于指定文件存储的格式;LOCATION能被用于指定在OBS上存储的路径。
示例
- 创建一个新表orders,使用子句with指定创建表的存储格式、存储位置、以及是否为外表。
通过“auto.purge”参数可以指定涉及到数据移除操作(如DROP、DELETE、INSERT OVERWRITE、TRUNCATE TABLE)时是否清除相关数据:
- "auto.purge"='true'时,清除元数据和数据文件。
- "auto.purge"='false'时,仅清除元数据,数据文件会移入OBS回收站。默认值为“false”,且不建议用户修改此属性,避免数据删除后无法恢复。
CREATE TABLE orders ( orderkey bigint, orderstatus varchar, totalprice double, orderdate date ) WITH (format = 'ORC', location='obs://bucket/user',orc_compress='ZLIB',external=true, "auto.purge"=false); -- 通过DESC FORMATTED 语句,可以查看建表的详细信息 desc formatted orders ; Describe Formatted Table ------------------------------------------------------------------------------ # col_name data_type comment orderkey bigint orderstatus varchar totalprice double orderdate date # Detailed Table Information Database: default Owner: admintest LastAccessTime: 0 Location: obs://bucket/user Table Type: EXTERNAL_TABLE # Table Parameters: EXTERNAL TRUE auto.purge false orc.compress.size 262144 orc.compression.codec ZLIB orc.row.index.stride 10000 orc.stripe.size 67108864 presto_query_id 20220812_084110_00050_srknk@default@HetuEngine presto_version 1.2.0-h0.cbu.mrs.320.r1-SNAPSHOT transient_lastDdlTime 1660293670 # Storage Information SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 (1 row)
- 创建一个新表,指定Row format:
--建表时,指定表的字段分隔符为‘,’号(如果创建外表,要求数据文件中的每条记录的字段是以逗号进行分隔) CREATE TABLE student( id string,birthday string, grade int, memo string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; --建表时,指定字段分隔符为'\t',换行符为'\n' CREATE TABLE test( id int, name string , tel string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS TEXTFILE;
- 如果表orders不存在,则创建表orders,并且增加表注释和列注释:
CREATE TABLE IF NOT EXISTS orders ( orderkey bigint, orderstatus varchar, totalprice double COMMENT 'Price in cents.', orderdate date ) COMMENT 'A table to keep track of orders.'; insert into orders values (202011181113,'online',9527,date '2020-11-11'), (202011181114,'online',666,date '2020-11-11'), (202011181115,'online',443,date '2020-11-11'), (202011181115,'offline',2896,date '2020-11-11');
- 使用表orders的列定义创建表bigger_orders:
CREATE TABLE bigger_orders ( another_orderkey bigint, LIKE orders, another_orderdate date ); SHOW CREATE TABLE bigger_orders ; Create Table --------------------------------------------------------------------- CREATE TABLE hive.default.bigger_orders ( another_orderkey bigint, orderkey bigint, orderstatus varchar, totalprice double, ordersdate date, another_orderdate date ) WITH ( external = false, format = 'ORC', location = 'obs://bucket/user/hive/warehouse/bigger_orders', orc_compress = 'GZIP', orc_compress_size = 262144, orc_row_index_stride = 10000, orc_stripe_size = 67108864 ) (1 row)
- 标号① 建表示例:
CREATE EXTERNAL TABLE hetu_test (orderkey bigint, orderstatus varchar, totalprice double, orderdate date) PARTITIONED BY(ds int) SORT BY (orderkey, orderstatus) COMMENT 'test' STORED AS ORC LOCATION '/user' TBLPROPERTIES (orc_compress = 'SNAPPY', orc_compress_size = 6710422, orc_bloom_filter_columns = 'orderstatus,totalprice');
- 标号② 建表示例:
CREATE EXTERNAL TABLE hetu_test1 (orderkey bigint, orderstatus varchar, totalprice double, orderdate date) COMMENT 'test' PARTITIONED BY(ds int) CLUSTERED BY (orderkey, orderstatus) SORTED BY (orderkey, orderstatus) INTO 16 BUCKETS STORED AS ORC LOCATION '/user' TBLPROPERTIES (orc_compress = 'SNAPPY', orc_compress_size = 6710422, orc_bloom_filter_columns = 'orderstatus,totalprice');
- 标号③ 建表示例:
CREATE TABLE hetu_test2 (orderkey bigint, orderstatus varchar, totalprice double, orderdate date, ds int) COMMENT 'This table is in Hetu syntax' WITH (partitioned_by = ARRAY['ds'], bucketed_by = ARRAY['orderkey', 'orderstatus'], sorted_by = ARRAY['orderkey', 'orderstatus'], bucket_count = 16, orc_compress = 'SNAPPY', orc_compress_size = 6710422, orc_bloom_filter_columns = ARRAY['orderstatus', 'totalprice'], external = true, format = 'orc', location = '/user');
- 查看表的建表语句:
show create table hetu_test1; Create Table ------------------------------------------------------------------ CREATE TABLE hive.default.hetu_test1 ( orderkey bigint, orderstatus varchar, totalprice double, orderdate date, ds integer ) COMMENT 'test' WITH ( bucket_count = 16, bucketed_by = ARRAY['orderkey','orderstatus'], bucketing_version = 1, external_location = 'obs://bucket/user', format = 'ORC', orc_bloom_filter_columns = ARRAY['orderstatus','totalprice'], orc_bloom_filter_fpp = 5E-2, orc_compress = 'SNAPPY', orc_compress_size = 6710422, orc_row_index_stride = 10000, orc_stripe_size = 67108864, partitioned_by = ARRAY['ds'], sorted_by = ARRAY['orderkey','orderstatus'] ) (1 row)
创建分区表
--创建schema CREATE SCHEMA hive.web WITH (location = 'obs://bucket/user'); --创建分区表 CREATE TABLE hive.web.page_views ( view_time timestamp, user_id bigint, page_url varchar, ds date, country varchar ) WITH ( format = 'ORC', partitioned_by = ARRAY['ds', 'country'], bucketed_by = ARRAY['user_id'], bucket_count = 50 ); --查看分区 SELECT * FROM hive.web."page_views$partitions"; ds | country ------------|--------- 2020-07-18 | US 2020-07-17 | US --插入数据 insert into hive.web.page_views values(timestamp '2020-07-17 23:00:15',bigint '15141','www.local.com',date '2020-07-17','US' ); insert into hive.web.page_views values(timestamp '2020-07-18 23:00:15',bigint '18148','www.local.com',date '2020-07-18','US' ); --查询数据 select * from hive.web.page_views; view_time | user_id | page_url | ds | country -------------------------|---------|---------------|------------|--------- 2020-07-17 23:00:15.000 | 15141 | www.local.com | 2020-07-17 | US 2020-07-18 23:00:15.000 | 18148 | www.local.com | 2020-07-18 | US