更新时间:2026-05-28 GMT+08:00
数据准备
- 进入DataArts Studio控制台 -->数据开发-->脚本开发,新建RDS SQL脚本,右上角选择MySQL数据连接和数据库,粘贴以下SQL命令,单击“运行”,在RDS for MySQL里建立原始表sales_detail,该表记录每条小票的购买信息,包括商品价格、数量等。
CREATE TABLE `sales_detail` ( `id` bigint NOT NULL AUTO_INCREMENT, `flow_id` varchar(50) NOT NULL, `org_code` varchar(20) NOT NULL, `product_code` varchar(20) NOT NULL, `price` decimal(10,2) NOT NULL, `quantity` decimal(10,3) NOT NULL, `amount` decimal(10,2) NOT NULL, `sale_time` datetime NOT NULL, PRIMARY KEY (`id`), KEY `idx_sales_detail_flow_time` (`flow_id`,`sale_time`), KEY `idx_sales_detail_flow_id` (`flow_id`) ) ENGINE=InnoDB AUTO_INCREMENT=16014715 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
数据字段说明如下:
表名:sales_detail
字段名称
字段类型
字段说明
flow_id
varchar
流水id
org_code
varchar
店铺编码
product_code
varchar
商品编码
price
decimal
单价
quantity
decimal
数量
amount
decimal
总价
sale_time
datetime
销售时间
图1 MySQL数据库建表
- 本节提供部分测试数据示例。新建RDS SQL脚本,粘贴以下SQL命令,单击”运行”,给sales_detail表插入测试数据。
INSERT INTO sales_detail (id, flow_id, org_code, product_code, price, quantity, amount, sale_time) VALUES (1, '100124080100054', '201041', '0007343', 4.50, 1.000, 4.50, '2024-08-01 09:56:00'), (2, '100124080100054', '201058', '0002783', 49.60, 0.722, 35.81, '2024-08-01 09:56:00'), (3, '100124080100054', '201015', '0001114', 36.90, 1.000, 36.90, '2024-08-01 09:56:00'), (4, '10026031300000001', '201007', '0002897', 7.72, 0.488, 1.63, '2024-08-23 17:21:00'), (5, '100124080100054', '201041', '0000695', 6.99, 1.000, 6.99, '2024-08-01 09:56:00'), (6, '100124080100050', '201041', '0009520', 9.90, 1.000, 9.90, '2024-08-01 09:46:00'), (7, '10026031300000001', '201015', '0002878', 9.13, 0.475, 4.25, '2024-08-22 11:05:00'), (8, '100124080100049', '201055', '0005175', 3.16, 0.804, 2.54, '2024-08-01 09:46:00'), (9, '100124080100050', '201038', '0002333', 1.98, 0.382, 0.76, '2024-08-01 09:46:00'), (10, '10026031300000001', '201015', '0002864', 6.64, 0.914, 2.27, '2024-08-19 20:37:00'), (11, '10026031300000001', '201043', '0002853', 4.01, 0.706, 0.98, '2024-08-12 17:57:00'), (12, '10026031300000001', '201015', '0002854', 6.58, 0.854, 4.90, '2024-08-23 22:43:00'), (13, '10026031300000001', '201015', '0002851', 6.84, 0.555, 1.15, '2024-08-04 20:04:00'), (14, '10026031300000001', '201007', '0002847', 3.94, 0.768, 7.19, '2024-08-17 00:11:00'), (15, '10026031300000001', '201007', '0002843', 4.06, 0.401, 5.10, '2024-08-25 12:27:00'), (16, '10026031300000001', '201015', '0002841', 6.70, 0.632, 1.01, '2024-08-14 00:02:00');
- 新建DWS SQL脚本,粘贴以下SQL命令,右上角选择DWS数据连接和数仓,单击”运行”,在数仓中创建1个序列“sales_detail_id_seq”,3张数据表对应ODS层、DWD层、DWS层,分别是ods_sales_detail流水原始数据、dwd_retail_sales_detail_clean流水数据清洗表、dws_product_combination_analysis商品组合分析结果表。
CREATE SEQUENCE sales_detail_id_seq START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; CREATE TABLE ods_sales_detail ( id bigint DEFAULT nextval ('sales_detail_id_seq'::regclass) NOT NULL, flow_id character varying (50) NOT NULL, org_code character varying (20) NOT NULL, product_code character varying (20) NOT NULL, price numeric (10, 2) NOT NULL, quantity numeric (10, 3) NOT NULL, amount numeric (10, 2) NOT NULL, sale_time timestamp without TIME zone NOT NULL ) WITH(orientation = COLUMN, compression = no) DISTRIBUTE BY HASH(flow_id) TO GROUP group_version1; CREATE INDEX idx_ods_sales_detail_flow_time ON ods_sales_detail USING btree (flow_id, sale_time) TABLESPACE pg_default; CREATE INDEX idx_ods_sales_detail_flow_id ON ods_sales_detail USING btree (flow_id) TABLESPACE pg_default; CREATE TABLE dwd_retail_sales_detail_clean ( id bigint, flow_id character varying (50) NOT NULL, org_code character varying (20) NOT NULL, product_code character varying (20) NOT NULL, price numeric (10, 2) NOT NULL, quantity numeric (10, 3) NOT NULL, amount numeric (10, 2) NOT NULL, sale_time timestamp without TIME zone NOT NULL, item_cnt integer ) WITH(orientation = COLUMN,compression = no,colversion = 2.0,enable_delta = FALSE) DISTRIBUTE BY HASH(flow_id) TO GROUP group_version1; COMMENT ON TABLE dwd_retail_sales_detail_clean IS 'DWD层-零售销售明细清洗表'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.id IS '主键ID'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.flow_id IS '订单流水号'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.org_code IS '门店编码'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.product_code IS '商品编码'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.price IS '商品单价'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.quantity IS '购买数量'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.amount IS '商品金额'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.sale_time IS '销售时间'; COMMENT ON COLUMN dwd_retail_sales_detail_clean.item_cnt IS '该流水下商品总数(用于过滤)'; CREATE TABLE dws_product_combination_analysis ( org_code character varying (20) NOT NULL, product1 character varying (20) NOT NULL, product2 character varying (20) NOT NULL, combination_count integer NOT NULL, support_percentage numeric (10, 4) NOT NULL, confidence_product1_to_product2 numeric (10, 4) NOT NULL, confidence_product2_to_product1 numeric (10, 4) NOT NULL, analysis_date timestamp (0) without TIME zone NOT NULL ) WITH(orientation = COLUMN, compression = no) DISTRIBUTE BY HASH(org_code) TO GROUP group_version1; COMMENT ON TABLE dws_product_combination_analysis IS 'DWS层-商品组合分析结果表'; COMMENT ON COLUMN dws_product_combination_analysis.org_code IS '门店编码'; COMMENT ON COLUMN dws_product_combination_analysis.product1 IS '商品1编码'; COMMENT ON COLUMN dws_product_combination_analysis.product2 IS '商品2编码'; COMMENT ON COLUMN dws_product_combination_analysis.combination_count IS '组合出现次数'; COMMENT ON COLUMN dws_product_combination_analysis.support_percentage IS '支持度(%)'; COMMENT ON COLUMN dws_product_combination_analysis.confidence_product1_to_product2 IS '商品1到商品2的置信度(%)'; COMMENT ON COLUMN dws_product_combination_analysis.confidence_product2_to_product1 IS '商品2到商品1的置信度(%)'; COMMENT ON COLUMN dws_product_combination_analysis.analysis_date IS '分析日期'; ALTER TABLE dws_product_combination_analysis ADD CONSTRAINT dws_product_combination_analysis_pkey PRIMARY KEY (org_code, product1, product2, analysis_date);图2 数据仓库DWS建表