更新时间:2026-05-28 GMT+08:00
分享

数据准备

  1. 进入DataArts Studio控制台 -->数据开发-->脚本开发,新建RDS SQL脚本,右上角选择MySQL数据连接和数据库,粘贴以下SQL命令,单击“运行”,在RDS for MySQL里建立原始表sales_detail,该表记录每条小票的购买信息,包括商品价格、数量等。

    CREATE TABLE `sales_detail` (
      `id` bigint NOT NULL AUTO_INCREMENT,
      `flow_id` varchar(50) NOT NULL,
      `org_code` varchar(20) NOT NULL,
      `product_code` varchar(20) NOT NULL,
      `price` decimal(10,2) NOT NULL,
      `quantity` decimal(10,3) NOT NULL,
      `amount` decimal(10,2) NOT NULL,
      `sale_time` datetime NOT NULL,
      PRIMARY KEY (`id`),
      KEY `idx_sales_detail_flow_time` (`flow_id`,`sale_time`),
      KEY `idx_sales_detail_flow_id` (`flow_id`)
    ) ENGINE=InnoDB AUTO_INCREMENT=16014715 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

    数据字段说明如下:

    表名:sales_detail

    字段名称

    字段类型

    字段说明

    flow_id

    varchar

    流水id

    org_code

    varchar

    店铺编码

    product_code

    varchar

    商品编码

    price

    decimal

    单价

    quantity

    decimal

    数量

    amount

    decimal

    总价

    sale_time

    datetime

    销售时间

    图1 MySQL数据库建表

  2. 本节提供部分测试数据示例。新建RDS SQL脚本,粘贴以下SQL命令,单击”运行”,给sales_detail表插入测试数据。

    INSERT INTO sales_detail (id, flow_id, org_code, product_code, price, quantity, amount, sale_time) VALUES
    (1, '100124080100054', '201041', '0007343', 4.50, 1.000, 4.50, '2024-08-01 09:56:00'),
    (2, '100124080100054', '201058', '0002783', 49.60, 0.722, 35.81, '2024-08-01 09:56:00'),
    (3, '100124080100054', '201015', '0001114', 36.90, 1.000, 36.90, '2024-08-01 09:56:00'),
    (4, '10026031300000001', '201007', '0002897', 7.72, 0.488, 1.63, '2024-08-23 17:21:00'),
    (5, '100124080100054', '201041', '0000695', 6.99, 1.000, 6.99, '2024-08-01 09:56:00'),
    (6, '100124080100050', '201041', '0009520', 9.90, 1.000, 9.90, '2024-08-01 09:46:00'),
    (7, '10026031300000001', '201015', '0002878', 9.13, 0.475, 4.25, '2024-08-22 11:05:00'),
    (8, '100124080100049', '201055', '0005175', 3.16, 0.804, 2.54, '2024-08-01 09:46:00'),
    (9, '100124080100050', '201038', '0002333', 1.98, 0.382, 0.76, '2024-08-01 09:46:00'),
    (10, '10026031300000001', '201015', '0002864', 6.64, 0.914, 2.27, '2024-08-19 20:37:00'),
    (11, '10026031300000001', '201043', '0002853', 4.01, 0.706, 0.98, '2024-08-12 17:57:00'),
    (12, '10026031300000001', '201015', '0002854', 6.58, 0.854, 4.90, '2024-08-23 22:43:00'),
    (13, '10026031300000001', '201015', '0002851', 6.84, 0.555, 1.15, '2024-08-04 20:04:00'),
    (14, '10026031300000001', '201007', '0002847', 3.94, 0.768, 7.19, '2024-08-17 00:11:00'),
    (15, '10026031300000001', '201007', '0002843', 4.06, 0.401, 5.10, '2024-08-25 12:27:00'),
    (16, '10026031300000001', '201015', '0002841', 6.70, 0.632, 1.01, '2024-08-14 00:02:00');

  3. 新建DWS SQL脚本,粘贴以下SQL命令,右上角选择DWS数据连接和数仓,单击”运行”,在数仓中创建1个序列“sales_detail_id_seq”,3张数据表对应ODS层、DWD层、DWS层,分别是ods_sales_detail流水原始数据、dwd_retail_sales_detail_clean流水数据清洗表、dws_product_combination_analysis商品组合分析结果表。

    CREATE SEQUENCE sales_detail_id_seq
        START WITH 1
        INCREMENT BY 1
        NO MINVALUE
        NO MAXVALUE
        CACHE 1;
    CREATE TABLE ods_sales_detail (
    id bigint DEFAULT nextval ('sales_detail_id_seq'::regclass) NOT NULL,
    flow_id character varying (50) NOT NULL,
    org_code character varying (20) NOT NULL,
    product_code character varying (20) NOT NULL,
    price numeric (10, 2) NOT NULL,
    quantity numeric (10, 3) NOT NULL,
    amount numeric (10, 2) NOT NULL,
    sale_time timestamp without TIME zone NOT NULL
    )
    WITH(orientation = COLUMN, compression = no) DISTRIBUTE BY HASH(flow_id) TO GROUP group_version1;
    CREATE INDEX idx_ods_sales_detail_flow_time ON ods_sales_detail USING btree (flow_id, sale_time) TABLESPACE pg_default;
    CREATE INDEX idx_ods_sales_detail_flow_id ON ods_sales_detail USING btree (flow_id) TABLESPACE pg_default;
    
    
    CREATE TABLE dwd_retail_sales_detail_clean (
    id bigint,
    flow_id character varying (50) NOT NULL,
    org_code character varying (20) NOT NULL,
    product_code character varying (20) NOT NULL,
    price numeric (10, 2) NOT NULL,
    quantity numeric (10, 3) NOT NULL,
    amount numeric (10, 2) NOT NULL,
    sale_time timestamp without TIME zone NOT NULL,
    item_cnt integer
    )
    WITH(orientation = COLUMN,compression = no,colversion = 2.0,enable_delta = FALSE) DISTRIBUTE BY HASH(flow_id) TO GROUP group_version1;
    COMMENT ON TABLE dwd_retail_sales_detail_clean IS 'DWD层-零售销售明细清洗表';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.id IS '主键ID';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.flow_id IS '订单流水号';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.org_code IS '门店编码';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.product_code IS '商品编码';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.price IS '商品单价';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.quantity IS '购买数量';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.amount IS '商品金额';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.sale_time IS '销售时间';
    COMMENT ON COLUMN dwd_retail_sales_detail_clean.item_cnt IS '该流水下商品总数(用于过滤)';
    
    
    CREATE TABLE
    dws_product_combination_analysis (
    org_code character varying (20) NOT NULL,
    product1 character varying (20) NOT NULL,
    product2 character varying (20) NOT NULL,
    combination_count integer NOT NULL,
    support_percentage numeric (10, 4) NOT NULL,
    confidence_product1_to_product2 numeric (10, 4) NOT NULL,
    confidence_product2_to_product1 numeric (10, 4) NOT NULL,
    analysis_date timestamp (0) without TIME zone NOT NULL
    )
    WITH(orientation = COLUMN, compression = no) DISTRIBUTE BY HASH(org_code) TO GROUP group_version1;
    COMMENT ON TABLE dws_product_combination_analysis IS 'DWS层-商品组合分析结果表';
    COMMENT ON COLUMN dws_product_combination_analysis.org_code IS '门店编码';
    COMMENT ON COLUMN dws_product_combination_analysis.product1 IS '商品1编码';
    COMMENT ON COLUMN dws_product_combination_analysis.product2 IS '商品2编码';
    COMMENT ON COLUMN dws_product_combination_analysis.combination_count IS '组合出现次数';
    COMMENT ON COLUMN dws_product_combination_analysis.support_percentage IS '支持度(%)';
    COMMENT ON COLUMN dws_product_combination_analysis.confidence_product1_to_product2 IS '商品1到商品2的置信度(%)';
    COMMENT ON COLUMN dws_product_combination_analysis.confidence_product2_to_product1 IS '商品2到商品1的置信度(%)';
    COMMENT ON COLUMN dws_product_combination_analysis.analysis_date IS '分析日期';
    ALTER TABLE dws_product_combination_analysis
    ADD CONSTRAINT dws_product_combination_analysis_pkey PRIMARY KEY (org_code, product1, product2, analysis_date);

    图2 数据仓库DWS建表

相关文档