Updated on 2024-10-25 GMT+08:00

Data preparation for the sample table in this document

-- Create a table containing TINYINT data:   
 CREATE TABLE int_type_t1  (IT_COL1 TINYINT) ;     
--Insert data of the TINYINT type.
 insert into int_type_t1  values (TINYINT'10');     
-- Create a table containing DECIMAL data:   
CREATE TABLE decimal_t1       (dec_col1 DECIMAL(10,3)) ;     
– Insert data of the DECIMAL type:   
insert into decimal_t1  values (DECIMAL '5.325' );      
create table array_tb(col1 array<int>,col2 array<array<int>>);
create table row_tb(col1 row(a int,b varchar));

-- Create a Map table.
create table map_tb(col1 MAP<STRING,INT>);
-- Insert a piece of data of the Map type.
insert into map_tb values(MAP(ARRAY['foo','bar'],ARRAY[1,2]));
--Query data.
select * from map_tb; --   {bar=2, foo=1} 

-- Create a ROW table.
create table row_tb (id int,col1 row(a int,b varchar));
-- Insert data of the ROW type.
insert into row_tb values (1,ROW(1,'SSS'));
--Query data.
select * from row_tb; -- 
 id |     col1     
----|--------------
  1 | {a=1, b=SSS} 
select col1.b from row_tb; -- SSS
select col1[1] from row_tb; -- 1

-- Creating a STRUCT table.
create table struct_tab (id int,col1 struct<col2: integer, col3: string>);
-- Insert data of the STRUCT type.
 insert into struct_tab VALUES(1, struct<2, 'test'>);
--Query data.
select * from struct_tab; -- 
 id |        col1         
----|---------------------
  1 | {col2=2, col3=test} 

 
-- Creating a schema named "web":
CREATE SCHEMA web;
--Create a schema named sales in the Hive data source:
CREATE SCHEMA hive.sales;
-- Creating a schema named traffic, if it does not exist:
CREATE SCHEMA IF NOT EXISTS traffic;
 
-- Create a new table orders and use the WITH clause to specify the storage format, storage location, and whether the table is an external table.
CREATE TABLE orders ( 
orderkey bigint, 
orderstatus varchar, 
totalprice double, 
orderdate date 
) 
WITH (format = 'ORC', location='/user',external=true);
-- If the orders table does not exist, create the orders table and add table comments and column comments:
CREATE TABLE IF NOT EXISTS new_orders ( 
orderkey bigint, 
orderstatus varchar, 
totalprice double COMMENT 'Price in cents.', 
orderdate date 
) 
COMMENT 'A table to keep track of orders.';
-- Create the bigger_orders table using the column definition of the orders table:
CREATE TABLE bigger_orders ( 
another_orderkey bigint, 
LIKE orders, 
another_orderdate date 
);
 
CREATE SCHEMA hive.web WITH (location = 'hdfs://hacluster/user'); 
--Create a partitioned table.
CREATE TABLE hive.web.page_views ( 
  view_time timestamp, 
  user_id bigint, 
  page_url varchar, 
  ds date, 
  country varchar 
) 
WITH ( 
  format = 'ORC', 
  partitioned_by = ARRAY['ds', 'country'], 
  bucketed_by = ARRAY['user_id'], 
  bucket_count = 50 
); 
--Insert an empty partition.
CALL system.create_empty_partition( 
    schema_name => 'web', 
    table_name => 'page_views', 
    partition_columns => ARRAY['ds', 'country'], 
    partition_values => ARRAY['2020-07-17', 'US']); 
     
CALL system.create_empty_partition( 
    schema_name => 'web', 
    table_name => 'page_views', 
    partition_columns => ARRAY['ds', 'country'], 
    partition_values => ARRAY['2020-07-18', 'US']); 
--Insert data.
insert into hive.web.page_views values(timestamp '2020-07-17 23:00:15',bigint '15141','www.local.com',date '2020-07-17','US' ); 
insert into hive.web.page_views values(timestamp '2020-07-17 23:00:16',bigint '15142','www.abc.com',date '2020-07-17','US' ); 
insert into hive.web.page_views values(timestamp '2020-07-18 23:00:18',bigint '18148','www.local.com',date '2020-07-18','US' );

-- Delete all data in the partition specified by the WHERE clause from the partitioned table.
delete from hive.web.page_views where ds=date '2020-07-17' and country='US'; 
 
--Run the following statement to create the orders_column_aliased table based on the query result of a specified column:
CREATE TABLE orders_column_aliased (order_date, total_price)  
AS 
SELECT orderdate, totalprice FROM orders;
--Create the orders_by_data table based on the summary result of the orders table.
CREATE TABLE orders_by_date 
COMMENT 'Summary of orders by date' 
WITH (format = 'ORC') 
AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--If the orders_by_date table does not exist, create the orders_by_date table:
CREATE TABLE IF NOT EXISTS orders_by_date AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--Create the empty_orders table using the schema that is the same as that of the orders table but does not contain data.
CREATE TABLE empty_orders AS 
SELECT * 
FROM orders 
WITH NO DATA;
 
--Create a view named test_view in the orders table:
CREATE VIEW test_view (oderkey comment 'orderId',orderstatus comment 'status',half comment 'half') AS 
SELECT orderkey, orderstatus, totalprice / 2 AS half FROM orders;
--Create the orders_by_date_view view based on the summary result of the orders table.
CREATE VIEW orders_by_date_view AS 
SELECT orderdate, sum(totalprice) AS price 
FROM orders 
GROUP BY orderdate;
--Create a new view to replace the existing view:
CREATE OR REPLACE VIEW test_view AS 
SELECT orderkey, orderstatus, totalprice / 4 AS quarter 
FROM orders;
 
--Modify the definition of an existing table.
--Data preparation.
create table users (id int,name varchar);
--Change the table name from users to people:
ALTER TABLE users RENAME TO people;
--Add the zip column to the people table:
ALTER TABLE people ADD COLUMN zip varchar;
--Delete the zip column from the people table:
ALTER TABLE people DROP COLUMN zip;
--change the column name id in the people table to user_id:
ALTER TABLE people RENAME COLUMN id TO user_id;
 
create table testfordrop(name varchar);
 
--CreatE a view.
create view orders_by_date as select * from orders;
--Set the comment information of a table. You can delete the comment by setting the comment information to NULL.
COMMENT ON TABLE people IS 'master table';
 
--create a table with the column id and name:
CREATE TABLE example AS 
SELECT * FROM ( 
VALUES 
(1, 'a'), 
(2, 'b'), 
(3, 'c') 
) AS t (id, name);
 
--Create the fruit and fruit_copy tables:
create table fruit (name varchar,price double); 
create table fruit_copy (name varchar,price double);
--Insert a row of data into the fruit table:
insert into fruit values('LIchee',32);
--Insert multiple lines of data into the fruit table:
insert into fruit values('banana',10),('peach',6),('lemon',12),('apple',7);
--Load the data lines in the fruit table to the fruit_copy table. After the execution, there are five records in the table.
insert into fruit_copy select * from fruit;
--Clear the fruit_copy table, and then load the data in the fruit table to the table. After the execution, there are two records in the fruit_copy table.
insert overwrite fruit_copy select *  from fruit limit 2;
 
--Create a shipping table:
create table shipping(origin_state varchar(25),origin_zip integer,destination_state varchar(25) ,destination_zip integer,package_weight integer); 
 
--Insert data.
insert into shipping values ('California',94131,'New Jersey',8648,13), 
('California',94131,'New Jersey',8540,42), 
('California',90210,'Connecticut',6927,1337), 
('California',94131,'Colorado',80302,5), 
('New York',10002,'New Jersey',8540,3), 
('New Jersey',7081,'Connecticut',6708,225);
 
--Create a table and insert data into the table. 
create table cookies_log (cookieid varchar,createtime  date,pv int); 
insert into cookies_log values  
    ('cookie1',date '2020-07-10',1), 
    ('cookie1',date '2020-07-11',5),  
    ('cookie1',date '2020-07-12',7),  
    ('cookie1',date '2020-07-13',3), 
    ('cookie1',date '2020-07-14',2),  
    ('cookie1',date '2020-07-15',4),  
    ('cookie1',date '2020-07-16',4),  
    ('cookie2',date '2020-07-10',2), 
    ('cookie2',date '2020-07-11',3),  
    ('cookie2',date '2020-07-12',5), 
     ('cookie2',date '2020-07-13',6),  
      ('cookie2',date '2020-07-14',3),  
      ('cookie2',date '2020-07-15',9), 
    ('cookie2',date '2020-07-16',7);
 
--Create a table. 
create  table new_shipping (origin_state varchar,origin_zip varchar,packages int ,total_cost int); 
 
--Insert data. 
insert into new_shipping  
values  
('California','94131',25,100), 
('California','P332a',5,72), 
('California','94025',0,155), 
('New Jersey','08544',225,490); 
 
--Create a data table and insert data. 
create table salary (dept varchar, userid varchar, sal double); 
 insert into salary values ('d1','user1',1000),('d1','user2',2000),('d1','user3',3000),('d2','user4',4000),('d2','user5',5000);
 
-- Prepare data. 
create table cookie_views( cookieid varchar,createtime timestamp,url varchar); 
insert into cookie_views values 
('cookie1',timestamp '2020-07-10 10:00:02','url20'), 
('cookie1',timestamp '2020-07-10 10:00:00','url10'), 
('cookie1',timestamp '2020-07-10 10:03:04','urll3'), 
('cookie1',timestamp '2020-07-10 10:50:05','url60'), 
('cookie1',timestamp '2020-07-10 11:00:00','url70'), 
('cookie1',timestamp '2020-07-10 10:10:00','url40'), 
('cookie1',timestamp '2020-07-10 10:50:01','url50'), 
('cookie2',timestamp '2020-07-10 10:00:02','url23'), 
('cookie2',timestamp '2020-07-10 10:00:00','url11'), 
('cookie2',timestamp '2020-07-10 10:03:04','url33'), 
('cookie2',timestamp '2020-07-10 10:50:05','url66'), 
('cookie2',timestamp '2020-07-10 11:00:00','url77'), 
('cookie2',timestamp '2020-07-10 10:10:00','url47'), 
('cookie2',timestamp '2020-07-10 10:50:01','url55');
 
CREATE TABLE visit_summaries (  visit_date date,  hll varbinary); 
 
insert into visit_summaries select createtime,cast(approx_set(cookieid) as varbinary) from cookies_log group by createtime;
 
CREATE TABLE nation (name varchar,  regionkey integer);
insert into nation values ('ETHIOPIA',0),
('MOROCCO',0),
('ETHIOPIA',0),
('KENYA',0),
('ALGERIA',0),
('MOZAMBIQUE',0);
 
CREATE TABLE region (  name varchar,  regionkey integer);
insert into region values ('ETHIOPIA',0),
('MOROCCO',0),
('ETHIOPIA',0),
('KENYA',0),
('ALGERIA',0),
('MOZAMBIQUE',0);