更新时间:2024-07-24 GMT+08:00

HetuEngine样例表数据准备

--创建具有TINYINT类型数据的表。   
 CREATE TABLE int_type_t1  (IT_COL1 TINYINT) ;     
--插入TINYINT类型数据   
 insert into int_type_t1  values (TINYINT'10');     
--创建具有DECIMAL类型数据的表。   
CREATE TABLE decimal_t1       (dec_col1 DECIMAL(10,3)) ;     
--插入具有DECIMAL类型数据   
insert into decimal_t1  values (DECIMAL '5.325' );      
create table array_tb(col1 array<int>,col2 array<array<int>>);
create table row_tb(col1 row(a int,b varchar));

--创建Map类型表
create table map_tb(col1 MAP<STRING,INT>);
--插入一条Map类型数据
insert into map_tb values(MAP(ARRAY['foo','bar'],ARRAY[1,2]));
--查询数据
select * from map_tb; --   {bar=2, foo=1} 

--创建ROW表
create table row_tb (id int,col1 row(a int,b varchar));
--插入ROW类型数据
insert into row_tb values (1,ROW(1,'SSS'));
--查询数据
select * from row_tb; -- 
 id |     col1     
----|--------------
  1 | {a=1, b=SSS} 
select col1.b from row_tb; -- SSS
select col1[1] from row_tb; -- 1

-- 创建struct 表
create table struct_tab (id int,col1 struct<col2: integer, col3: string>);
--插入 struct 类型数据
 insert into struct_tab VALUES(1, struct<2, 'test'>);
--查询数据
select * from struct_tab; -- 
 id |        col1         
----|---------------------
  1 | {col2=2, col3=test} 

 
--创建一个名为web的schema:
CREATE SCHEMA web;
--在hive 数据源下创建一个名为sales的schema:
CREATE SCHEMA hive.sales;
--创建一个名为traffic,如果不存在的话:
CREATE SCHEMA IF NOT EXISTS traffic;
 
--创建一个新表orders,使用子句with指定创建表的存储格式、存储位置、以及是否为外表:
CREATE TABLE orders ( 
orderkey bigint, 
orderstatus varchar, 
totalprice double, 
orderdate date 
) 
WITH (format = 'ORC', location='/user',external=true);
--如果表orders不存在,则创建表orders,并且增加表注释和列注释:
CREATE TABLE IF NOT EXISTS new_orders ( 
orderkey bigint, 
orderstatus varchar, 
totalprice double COMMENT 'Price in cents.', 
orderdate date 
) 
COMMENT 'A table to keep track of orders.';
--使用表orders的列定义创建表bigger_orders:
CREATE TABLE bigger_orders ( 
another_orderkey bigint, 
LIKE orders, 
another_orderdate date 
);
 
CREATE SCHEMA hive.web WITH (location = 'hdfs://hacluster/user'); 
--创建分区表 
CREATE TABLE hive.web.page_views ( 
  view_time timestamp, 
  user_id bigint, 
  page_url varchar, 
  ds date, 
  country varchar 
) 
WITH ( 
  format = 'ORC', 
  partitioned_by = ARRAY['ds', 'country'], 
  bucketed_by = ARRAY['user_id'], 
  bucket_count = 50 
); 
--插入空的分区 
CALL system.create_empty_partition( 
    schema_name => 'web', 
    table_name => 'page_views', 
    partition_columns => ARRAY['ds', 'country'], 
    partition_values => ARRAY['2020-07-17', 'US']); 
     
CALL system.create_empty_partition( 
    schema_name => 'web', 
    table_name => 'page_views', 
    partition_columns => ARRAY['ds', 'country'], 
    partition_values => ARRAY['2020-07-18', 'US']); 
--插入数据 
insert into hive.web.page_views values(timestamp '2020-07-17 23:00:15',bigint '15141','www.local.com',date '2020-07-17','US' ); 
insert into hive.web.page_views values(timestamp '2020-07-17 23:00:16',bigint '15142','www.abc.com',date '2020-07-17','US' ); 
insert into hive.web.page_views values(timestamp '2020-07-18 23:00:18',bigint '18148','www.local.com',date '2020-07-18','US' );

-- 删除分区表数据(删除where子句指定的分区所有数据)
delete from hive.web.page_views where ds=date '2020-07-17' and country='US'; 
 
--用指定列的查询结果创建新表orders_column_aliased:
CREATE TABLE orders_column_aliased (order_date, total_price)  
AS 
SELECT orderdate, totalprice FROM orders;
--用表orders的汇总结果新建一个表orders_by_data:
CREATE TABLE orders_by_date 
COMMENT 'Summary of orders by date' 
WITH (format = 'ORC') 
AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--如果表orders_by_date不存在,则创建表orders_by_date:
CREATE TABLE IF NOT EXISTS orders_by_date AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--用和表orders具有相同schema创建新表empty_orders table,但是没数据:
CREATE TABLE empty_orders AS 
SELECT * 
FROM orders 
WITH NO DATA;
 
--通过表orders创建一个视图test_view:
CREATE VIEW test_view (oderkey comment 'orderId',orderstatus comment 'status',half comment 'half') AS 
SELECT orderkey, orderstatus, totalprice / 2 AS half FROM orders;
--通过表orders的汇总结果创建视图orders_by_date_view:
CREATE VIEW orders_by_date_view AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--创建一个新视图来替换已经存在的视图:
CREATE OR REPLACE VIEW test_view AS 
SELECT orderkey, orderstatus, totalprice / 4 AS quarter 
FROM orders;
 
--更改已存在表的定义。
--数据准备
create table users (id int,name varchar);
--将表名从users 修改为 people:
ALTER TABLE users RENAME TO people;
--在表people中增加名为zip的列:
ALTER TABLE people ADD COLUMN zip varchar;
--从表people中删除名为zip的列:
ALTER TABLE people DROP COLUMN zip;
--将表people中列名id更改为user_id:
ALTER TABLE people RENAME COLUMN id TO user_id;
 
create table testfordrop(name varchar);
 
--创建视图
create view orders_by_date as select * from orders;
--设置表的注释信息,可以通过设置注释信息为NULL来删除注释
COMMENT ON TABLE people IS 'master table';
 
--创建一个具有列名id、name的新表:
CREATE TABLE example AS 
SELECT * FROM ( 
VALUES 
(1, 'a'), 
(2, 'b'), 
(3, 'c') 
) AS t (id, name);
 
--创建fruit 和 fruit_copy表
create table fruit (name varchar,price double); 
create table fruit_copy (name varchar,price double);
--向 fruit 表中插入一行数据
insert into fruit values('LIchee',32);
--向fruit 表中插入多行数据
insert into fruit values('banana',10),('peach',6),('lemon',12),('apple',7);
--将fruit表中的数据行加载到fruit_copy 表中,执行后表中有5条记录
insert into fruit_copy select * from fruit;
--先清空fruit_copy表,再将fruit 中的数据加载到表中,执行之后表中有2条记录。
insert overwrite fruit_copy select *  from fruit limit 2;
 
--创建一个航运表 
create table shipping(origin_state varchar(25),origin_zip integer,destination_state varchar(25) ,destination_zip integer,package_weight integer); 
 
--插入数据 
insert into shipping values ('California',94131,'New Jersey',8648,13), 
('California',94131,'New Jersey',8540,42), 
('California',90210,'Connecticut',6927,1337), 
('California',94131,'Colorado',80302,5), 
('New York',10002,'New Jersey',8540,3), 
('New Jersey',7081,'Connecticut',6708,225);
 
--创建表并插入数据 
create table cookies_log (cookieid varchar,createtime  date,pv int); 
insert into cookies_log values  
    ('cookie1',date '2020-07-10',1), 
    ('cookie1',date '2020-07-11',5),  
    ('cookie1',date '2020-07-12',7),  
    ('cookie1',date '2020-07-13',3), 
    ('cookie1',date '2020-07-14',2),  
    ('cookie1',date '2020-07-15',4),  
    ('cookie1',date '2020-07-16',4),  
    ('cookie2',date '2020-07-10',2), 
    ('cookie2',date '2020-07-11',3),  
    ('cookie2',date '2020-07-12',5), 
     ('cookie2',date '2020-07-13',6),  
      ('cookie2',date '2020-07-14',3),  
      ('cookie2',date '2020-07-15',9), 
    ('cookie2',date '2020-07-16',7);
 
-- 创建表 
create  table new_shipping (origin_state varchar,origin_zip varchar,packages int ,total_cost int); 
 
-- 插入数据 
insert into new_shipping  
values  
('California','94131',25,100), 
('California','P332a',5,72), 
('California','94025',0,155), 
('New Jersey','08544',225,490); 
 
--创建数据表并插入数据 
create table salary (dept varchar, userid varchar, sal double); 
 insert into salary values ('d1','user1',1000),('d1','user2',2000),('d1','user3',3000),('d2','user4',4000),('d2','user5',5000);
 
-- 数据准备 
create table cookie_views( cookieid varchar,createtime timestamp,url varchar); 
insert into cookie_views values 
('cookie1',timestamp '2020-07-10 10:00:02','url20'), 
('cookie1',timestamp '2020-07-10 10:00:00','url10'), 
('cookie1',timestamp '2020-07-10 10:03:04','urll3'), 
('cookie1',timestamp '2020-07-10 10:50:05','url60'), 
('cookie1',timestamp '2020-07-10 11:00:00','url70'), 
('cookie1',timestamp '2020-07-10 10:10:00','url40'), 
('cookie1',timestamp '2020-07-10 10:50:01','url50'), 
('cookie2',timestamp '2020-07-10 10:00:02','url23'), 
('cookie2',timestamp '2020-07-10 10:00:00','url11'), 
('cookie2',timestamp '2020-07-10 10:03:04','url33'), 
('cookie2',timestamp '2020-07-10 10:50:05','url66'), 
('cookie2',timestamp '2020-07-10 11:00:00','url77'), 
('cookie2',timestamp '2020-07-10 10:10:00','url47'), 
('cookie2',timestamp '2020-07-10 10:50:01','url55');
 
CREATE TABLE visit_summaries (  visit_date date,  hll varbinary); 
 
insert into visit_summaries select createtime,cast(approx_set(cookieid) as varbinary) from cookies_log group by createtime;
 
CREATE TABLE nation (name varchar,  regionkey integer);
insert into nation values ('ETHIOPIA',0),
('MOROCCO',0),
('ETHIOPIA',0),
('KENYA',0),
('ALGERIA',0),
('MOZAMBIQUE',0);
 
CREATE TABLE region (  name varchar,  regionkey integer);
insert into region values ('ETHIOPIA',0),
('MOROCCO',0),
('ETHIOPIA',0),
('KENYA',0),
('ALGERIA',0),
('MOZAMBIQUE',0);