更新时间:2024-10-22 GMT+08:00
分享

建表与导入SSB数据

创建SSB目标表

连接DWS数据库后执行以下SQL语句。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
CREATE TABLE CUSTOMER
(
C_CUSTKEY     BIGINT NOT NULL,
C_NAME        VARCHAR(25) NOT NULL,
C_ADDRESS     VARCHAR(40) NOT NULL,
C_CITY        VARCHAR(25) NOT NULL,
C_NATION      VARCHAR(25) NOT NULL,
C_REGION      VARCHAR(25) NOT NULL,
C_PHONE       VARCHAR(15) NOT NULL,
C_MKTSEGMENT  VARCHAR(10) NOT NULL
)
WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)
DISTRIBUTE BY hash(C_CUSTKEY) ;
CREATE TABLE SUPPLIER
(
S_SUPPKEY  BIGINT NOT NULL
, S_NAME  VARCHAR(25) NOT NULL
, S_ADDRESS  VARCHAR(40) NOT NULL
, S_CITY        VARCHAR(25) NOT NULL
, S_NATION      VARCHAR(25) NOT NULL
, S_REGION      VARCHAR(25) NOT NULL
, S_PHONE VARCHAR(15) NOT NULL
)
WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)
DISTRIBUTE BY hash(S_SUPPKEY);
CREATE TABLE PART
(
P_PARTKEY  BIGINT NOT NULL
, P_NAME  VARCHAR(55) NOT NULL
, P_MFGR  VARCHAR(25) NOT NULL
, P_CATEGORY   VARCHAR(25) NOT NULL
, P_BRAND VARCHAR(10) NOT NULL
, P_COLOR VARCHAR(20) NOT NULL
, P_TYPE  VARCHAR(25) NOT NULL
, P_SIZE  BIGINT NOT NULL
, P_CONTAINER   VARCHAR(10) NOT NULL
)
WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)
DISTRIBUTE BY hash(P_PARTKEY);
CREATE TABLE lineorder
(
LO_ORDERKEY             BIGINT NOT NULL,
LO_LINENUMBER           BIGINT NOT NULL,
LO_CUSTKEY              BIGINT NOT NULL,
LO_PARTKEY              BIGINT NOT NULL,
LO_SUPPKEY              BIGINT NOT NULL,
LO_ORDERDATE            DATE NOT NULL,
LO_ORDERPRIORITY        VARCHAR(15) NOT NULL,
LO_SHIPPRIORITY         BIGINT NOT NULL,
LO_QUANTITY             BIGINT NOT NULL,
LO_EXTENDEDPRICE        BIGINT NOT NULL,
LO_ORDTOTALPRICE        BIGINT NOT NULL,
LO_DISCOUNT             BIGINT NOT NULL,
LO_REVENUE              BIGINT NOT NULL,
LO_SUPPLYCOST           BIGINT NOT NULL,
LO_TAX                  BIGINT NOT NULL,
LO_COMMITDATE           DATE NOT NULL,
LO_SHIPMODE             VARCHAR(10) NOT NULL
)
WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true)
DISTRIBUTE BY hash(LO_ORDERKEY)
PARTITION BY RANGE(LO_ORDERDATE)
(
PARTITION LO_ORDERDATE_1 VALUES LESS THAN('1992-04-01 00:00:00'),
PARTITION LO_ORDERDATE_2 VALUES LESS THAN('1992-07-01 00:00:00'),
PARTITION LO_ORDERDATE_3 VALUES LESS THAN('1992-10-01 00:00:00'),
PARTITION LO_ORDERDATE_4 VALUES LESS THAN('1993-01-01 00:00:00'),
PARTITION LO_ORDERDATE_5 VALUES LESS THAN('1993-04-01 00:00:00'),
PARTITION LO_ORDERDATE_6 VALUES LESS THAN('1993-07-01 00:00:00'),
PARTITION LO_ORDERDATE_7 VALUES LESS THAN('1993-10-01 00:00:00'),
PARTITION LO_ORDERDATE_8 VALUES LESS THAN('1994-01-01 00:00:00'),
PARTITION LO_ORDERDATE_9 VALUES LESS THAN('1994-04-01 00:00:00'),
PARTITION LO_ORDERDATE_10 VALUES LESS THAN('1994-07-01 00:00:00'),
PARTITION LO_ORDERDATE_11 VALUES LESS THAN('1994-10-01 00:00:00'),
PARTITION LO_ORDERDATE_12 VALUES LESS THAN('1995-01-01 00:00:00'),
PARTITION LO_ORDERDATE_13 VALUES LESS THAN('1995-04-01 00:00:00'),
PARTITION LO_ORDERDATE_14 VALUES LESS THAN('1995-07-01 00:00:00'),
PARTITION LO_ORDERDATE_15 VALUES LESS THAN('1995-10-01 00:00:00'),
PARTITION LO_ORDERDATE_16 VALUES LESS THAN('1996-01-01 00:00:00'),
PARTITION LO_ORDERDATE_17 VALUES LESS THAN('1996-04-01 00:00:00'),
PARTITION LO_ORDERDATE_18 VALUES LESS THAN('1996-07-01 00:00:00'),
PARTITION LO_ORDERDATE_19 VALUES LESS THAN('1996-10-01 00:00:00'),
PARTITION LO_ORDERDATE_20 VALUES LESS THAN('1997-01-01 00:00:00'),
PARTITION LO_ORDERDATE_21 VALUES LESS THAN('1997-04-01 00:00:00'),
PARTITION LO_ORDERDATE_22 VALUES LESS THAN('1997-07-01 00:00:00'),
PARTITION LO_ORDERDATE_23 VALUES LESS THAN('1997-10-01 00:00:00'),
PARTITION LO_ORDERDATE_24 VALUES LESS THAN('1998-01-01 00:00:00'),
PARTITION LO_ORDERDATE_25 VALUES LESS THAN('1998-04-01 00:00:00'),
PARTITION LO_ORDERDATE_26 VALUES LESS THAN('1998-07-01 00:00:00'),
PARTITION LO_ORDERDATE_27 VALUES LESS THAN('1998-10-01 00:00:00'),
PARTITION LO_ORDERDATE_28 VALUES LESS THAN('1999-01-01 00:00:00')
);
SET enable_hstoreopt_auto_bitmap=true;
CREATE TABLE lineorder_flat
(
LO_ORDERKEY             BIGINT NOT NULL,
LO_LINENUMBER           BIGINT NOT NULL,
LO_CUSTKEY              BIGINT NOT NULL,
LO_PARTKEY              BIGINT NOT NULL,
LO_SUPPKEY              BIGINT NOT NULL,
LO_ORDERDATE            DATE NOT NULL,
LO_ORDERPRIORITY        VARCHAR(15) NOT NULL,
LO_SHIPPRIORITY         BIGINT NOT NULL,
LO_QUANTITY             BIGINT NOT NULL,
LO_EXTENDEDPRICE        BIGINT NOT NULL,
LO_ORDTOTALPRICE        BIGINT NOT NULL,
LO_DISCOUNT             BIGINT NOT NULL,
LO_REVENUE              BIGINT NOT NULL,
LO_SUPPLYCOST           BIGINT NOT NULL,
LO_TAX                  BIGINT NOT NULL,
LO_COMMITDATE           DATE NOT NULL,
LO_SHIPMODE             VARCHAR(10) NOT NULL,
C_NAME        VARCHAR(25) NOT NULL
, C_ADDRESS     VARCHAR(40) NOT NULL
, C_CITY        VARCHAR(25) NOT NULL
, C_NATION      VARCHAR(25) NOT NULL
, C_REGION      VARCHAR(25) NOT NULL
, C_PHONE       VARCHAR(15) NOT NULL
, C_MKTSEGMENT  VARCHAR(10) NOT NULL
, S_NAME        VARCHAR(25) NOT NULL
, S_ADDRESS     VARCHAR(40) NOT NULL
, S_CITY        VARCHAR(25) NOT NULL
, S_NATION      VARCHAR(25) NOT NULL
, S_REGION      VARCHAR(25) NOT NULL
, S_PHONE       VARCHAR(15) NOT NULL
, P_NAME  VARCHAR(55) NOT NULL
, P_MFGR  VARCHAR(25) NOT NULL
, P_CATEGORY   VARCHAR(25) NOT NULL
, P_BRAND VARCHAR(10) NOT NULL
, P_COLOR VARCHAR(20) NOT NULL
, P_TYPE  VARCHAR(25) NOT NULL
, P_SIZE  BIGINT NOT NULL
, P_CONTAINER   VARCHAR(10) NOT NULL
, Partial Cluster Key(s_region,s_nation,s_city)
) WITH (orientation=column, colversion=2.0,enable_hstore=true,enable_hstore_opt=true,secondary_part_column='p_mfgr', secondary_part_num=8)
DISTRIBUTE BY hash(LO_ORDERKEY)
PARTITION BY RANGE(LO_ORDERDATE)
(
PARTITION LO_ORDERDATE_1 VALUES LESS THAN('1992-04-01 00:00:00'),
PARTITION LO_ORDERDATE_2 VALUES LESS THAN('1992-07-01 00:00:00'),
PARTITION LO_ORDERDATE_3 VALUES LESS THAN('1992-10-01 00:00:00'),
PARTITION LO_ORDERDATE_4 VALUES LESS THAN('1993-01-01 00:00:00'),
PARTITION LO_ORDERDATE_5 VALUES LESS THAN('1993-04-01 00:00:00'),
PARTITION LO_ORDERDATE_6 VALUES LESS THAN('1993-07-01 00:00:00'),
PARTITION LO_ORDERDATE_7 VALUES LESS THAN('1993-10-01 00:00:00'),
PARTITION LO_ORDERDATE_8 VALUES LESS THAN('1994-01-01 00:00:00'),
PARTITION LO_ORDERDATE_9 VALUES LESS THAN('1994-04-01 00:00:00'),
PARTITION LO_ORDERDATE_10 VALUES LESS THAN('1994-07-01 00:00:00'),
PARTITION LO_ORDERDATE_11 VALUES LESS THAN('1994-10-01 00:00:00'),
PARTITION LO_ORDERDATE_12 VALUES LESS THAN('1995-01-01 00:00:00'),
PARTITION LO_ORDERDATE_13 VALUES LESS THAN('1995-04-01 00:00:00'),
PARTITION LO_ORDERDATE_14 VALUES LESS THAN('1995-07-01 00:00:00'),
PARTITION LO_ORDERDATE_15 VALUES LESS THAN('1995-10-01 00:00:00'),
PARTITION LO_ORDERDATE_16 VALUES LESS THAN('1996-01-01 00:00:00'),
PARTITION LO_ORDERDATE_17 VALUES LESS THAN('1996-04-01 00:00:00'),
PARTITION LO_ORDERDATE_18 VALUES LESS THAN('1996-07-01 00:00:00'),
PARTITION LO_ORDERDATE_19 VALUES LESS THAN('1996-10-01 00:00:00'),
PARTITION LO_ORDERDATE_20 VALUES LESS THAN('1997-01-01 00:00:00'),
PARTITION LO_ORDERDATE_21 VALUES LESS THAN('1997-04-01 00:00:00'),
PARTITION LO_ORDERDATE_22 VALUES LESS THAN('1997-07-01 00:00:00'),
PARTITION LO_ORDERDATE_23 VALUES LESS THAN('1997-10-01 00:00:00'),
PARTITION LO_ORDERDATE_24 VALUES LESS THAN('1998-01-01 00:00:00'),
PARTITION LO_ORDERDATE_25 VALUES LESS THAN('1998-04-01 00:00:00'),
PARTITION LO_ORDERDATE_26 VALUES LESS THAN('1998-07-01 00:00:00'),
PARTITION LO_ORDERDATE_27 VALUES LESS THAN('1998-10-01 00:00:00'),
PARTITION LO_ORDERDATE_28 VALUES LESS THAN('1999-01-01 00:00:00')
) ;
SET enable_hstoreopt_auto_bitmap=false;

创建SSB数据集的GDS外表

连接DWS数据库后执行以下SQL语句。

以下每个外表的“gsfs://192.168.0.90:500x/xxx | gsfs://192.168.0.90:500x/xxx”中的IP地址和端口,请替换成安装和启动GDS中的对应的GDS的监听IP和端口。如启动两个GDS,则使用“|”区分。如果启动多个GDS,需要将所有GDS的监听IP和端口配置到外表中。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
DROP FOREIGN TABLE IF EXISTS customer_load;
CREATE FOREIGN TABLE customer_load
(
   C_CUSTKEY     BIGINT NOT NULL
  , C_NAME        VARCHAR(25) NOT NULL
  , C_ADDRESS     VARCHAR(40) NOT NULL
  , C_CITY        VARCHAR(25) NOT NULL
  , C_NATION      VARCHAR(25) NOT NULL
  , C_REGION      VARCHAR(25) NOT NULL
  , C_PHONE       VARCHAR(15) NOT NULL
  , C_MKTSEGMENT  VARCHAR(10) NOT NULL)
SERVER gsmpp_server
OPTIONS(location 'gsfs://192.168.0.90:5004/customer.tbl*',
format 'text',
delimiter '|',
encoding 'utf8',
mode 'Normal'
);
 
DROP FOREIGN TABLE IF EXISTS supplier_load;
CREATE FOREIGN TABLE supplier_load
(
 S_SUPPKEY  BIGINT NOT NULL
  , S_NAME  VARCHAR(25) NOT NULL
  , S_ADDRESS  VARCHAR(40) NOT NULL
  , S_CITY        VARCHAR(25) NOT NULL
  , S_NATION      VARCHAR(25) NOT NULL
  , S_REGION      VARCHAR(25) NOT NULL
  , S_PHONE VARCHAR(15) NOT NULL)
SERVER gsmpp_server
OPTIONS(location 'gsfs://192.168.0.90:5004/supplier.tbl*',
format 'text',
delimiter '|',
encoding 'utf8',
mode 'Normal'
);
 
DROP FOREIGN TABLE IF EXISTS part_load;
CREATE FOREIGN TABLE part_load
(
 P_PARTKEY  BIGINT NOT NULL
  , P_NAME  VARCHAR(55) NOT NULL
  , P_MFGR  VARCHAR(25) NOT NULL
  , P_CATEGORY   VARCHAR(25) NOT NULL
  , P_BRAND VARCHAR(10) NOT NULL
  , P_COLOR VARCHAR(20) NOT NULL
  , P_TYPE  VARCHAR(25) NOT NULL
  , P_SIZE  BIGINT NOT NULL
  , P_CONTAINER   VARCHAR(10) NOT NULL)
SERVER gsmpp_server
OPTIONS(location 'gsfs://192.168.0.90:5004/part.tbl*',
format 'text',
delimiter '|',
encoding 'utf8',
mode 'Normal'
);
 
DROP FOREIGN TABLE IF EXISTS lineorder_load;
CREATE FOREIGN TABLE lineorder_load
(
    LO_ORDERKEY             BIGINT NOT NULL,
    LO_LINENUMBER           BIGINT NOT NULL,
    LO_CUSTKEY              BIGINT NOT NULL,
    LO_PARTKEY              BIGINT NOT NULL,
    LO_SUPPKEY              BIGINT NOT NULL,
    LO_ORDERDATE            DATE NOT NULL,
    LO_ORDERPRIORITY        VARCHAR(15) NOT NULL,
    LO_SHIPPRIORITY         BIGINT NOT NULL,
    LO_QUANTITY             BIGINT NOT NULL,
    LO_EXTENDEDPRICE        BIGINT NOT NULL,
    LO_ORDTOTALPRICE        BIGINT NOT NULL,
    LO_DISCOUNT             BIGINT NOT NULL,
    LO_REVENUE              BIGINT NOT NULL,
    LO_SUPPLYCOST           BIGINT NOT NULL,
    LO_TAX                  BIGINT NOT NULL,
    LO_COMMITDATE           DATE NOT NULL,
    LO_SHIPMODE             VARCHAR(10) NOT NULL)
SERVER gsmpp_server
OPTIONS(location 'gsfs://192.168.0.90:5004/lineorder.tbl*',
format 'text',
delimiter '|',
encoding 'utf8',
mode 'Normal'
);

导入SSB数据

执行以下命令导入数据。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
INSERT INTO customer SELECT * FROM customer_load;
INSERT INTO supplier SELECT * FROM supplier_load;
INSERT INTO part SELECT * FROM part_load;
INSERT INTO lineorder SELECT * FROM lineorder_load;
 
INSERT INTO lineorder_flat
SELECT
 l.LO_ORDERKEY AS LO_ORDERKEY,
 l.LO_LINENUMBER AS LO_LINENUMBER,
 l.LO_CUSTKEY AS LO_CUSTKEY,
 l.LO_PARTKEY AS LO_PARTKEY,
 l.LO_SUPPKEY AS LO_SUPPKEY,
 l.LO_ORDERDATE AS LO_ORDERDATE,
 l.LO_ORDERPRIORITY AS LO_ORDERPRIORITY,
 l.LO_SHIPPRIORITY AS LO_SHIPPRIORITY,
 l.LO_QUANTITY AS LO_QUANTITY,
 l.LO_EXTENDEDPRICE AS LO_EXTENDEDPRICE,
 l.LO_ORDTOTALPRICE AS LO_ORDTOTALPRICE,
 l.LO_DISCOUNT AS LO_DISCOUNT,
 l.LO_REVENUE AS LO_REVENUE,
 l.LO_SUPPLYCOST AS LO_SUPPLYCOST,
 l.LO_TAX AS LO_TAX,
 l.LO_COMMITDATE AS LO_COMMITDATE,
 l.LO_SHIPMODE AS LO_SHIPMODE,
 c.C_NAME AS C_NAME,
 c.C_ADDRESS AS C_ADDRESS,
 c.C_CITY AS C_CITY,
 c.C_NATION AS C_NATION,
 c.C_REGION AS C_REGION,
 c.C_PHONE AS C_PHONE,
 c.C_MKTSEGMENT AS C_MKTSEGMENT,
 s.S_NAME AS S_NAME,
 s.S_ADDRESS AS S_ADDRESS,
 s.S_CITY AS S_CITY,
 s.S_NATION AS S_NATION,
 s.S_REGION AS S_REGION,
 s.S_PHONE AS S_PHONE,
 p.P_NAME AS P_NAME,
 p.P_MFGR AS P_MFGR,
 p.P_CATEGORY AS P_CATEGORY,
 p.P_BRAND AS P_BRAND,
 p.P_COLOR AS P_COLOR,
 p.P_TYPE AS P_TYPE,
 p.P_SIZE AS P_SIZE,
 p.P_CONTAINER AS P_CONTAINER
FROM lineorder AS l
INNER JOIN customer AS c ON c.C_CUSTKEY = l.LO_CUSTKEY
INNER JOIN supplier AS s ON s.S_SUPPKEY = l.LO_SUPPKEY
INNER JOIN part AS p ON p.P_PARTKEY = l.LO_PARTKEY;

相关文档