更新时间:2021-03-18 GMT+08:00
分享

代码样例

打开拷贝过来的算子样例文件reduction.py

详细的代码解释请参考如下注释。

#coding=utf-8

import te.lang.cce
from te import tvm
from topi import generic
from topi.cce import util
def reduction(shape, dtype, axis, operation, coeff, kernel_name="Reduction", 
                              need_build=True, need_print=False):
    """
    Reduce a tensor on a certain axis, and scale output with coeff
    Parameters
    ----------
    shape : shape of data
    dtype : source data type, only support float16, float32
    axis : the first axis to reduce, may be negative to index from the end (e.g., -1 for the last axis).
           If axis == 0, the output Blob always has the empty shape (count 1), performing reduction across the entire input.
    op : can only be one of "SUM, ASUM (sum of abs), SUMSQ (sum of sqr), MEAN"
    coeff : scale for output
    kernel_name : cce kernel name, default value is "cce_reductionLayer"
    need_buid : if need to build CCEC kernel, default value is False
    need_print : if need to print the ir, default value is False
    Returns
    -------
    None
    """
    #基本参数校验
    #shape参数的校验,check_shape_rule()函数的定义文件为ddk/ddk/site-packages/topi-0.4.0.egg/topi/cce/util.py
    util.check_shape_rule(shape)
    check_list = ["float16", "float32"]
    if not (dtype.lower() in check_list):
        raise RuntimeError("Reduction only support %s while dtype is %s" % (",".join(check_list), dtype))

    reduction_op = ("SUM", "ASUM", "SUMSQ", "MEAN")
  
    # axis参数校验
    if type(axis) != int:
        raise RuntimeError("type of axis value should be int")
    if axis >= len(shape) or axis < -len(shape):
        raise RuntimeError(
            "input axis is out of range, axis value can be from %d to %d" % (-len(shape), len(shape) - 1))
   # op参数校验
    if operation not in reduction_op:
        raise RuntimeError("op can only be one of SUM, ASUM, SUMSQ , MEAN")
   # coeff参数校验
    if type(coeff) != int and type(coeff) != float:
        raise RuntimeError("coeff must be a value")
   # 参数预处理
    if axis < 0:
        axis = len(shape) + axis
    shape = list(shape)
    shape1 = shape[:axis] + [reduce(lambda x, y: x * y, shape[axis:])]
    inp_dtype = dtype.lower()
   # 定义输入数据Tensor,仅占位,未分配实际内存
    data = tvm.placeholder(shape1, name="data_input", dtype=inp_dtype)
   # 定义算子计算过程  
    with tvm.target.cce():
        if operation == "ASUM":
            data_tmp_input = te.lang.cce.vabs(data)
            cof = coeff
            tmp = te.lang.cce.vmuls(data_tmp_input, cof)
        elif operation == "SUMSQ":
            data_tmp_input = te.lang.cce.vmul(data, data)
            cof = coeff
            tmp = te.lang.cce.vmuls(data_tmp_input, cof)
        elif operation == "MEAN":
            size = shape1[-1]
            cof = float(coeff) * (size ** (-0.5))
            tmp = te.lang.cce.vmuls(data, cof)
        elif operation == "SUM":
            cof = coeff
            data_tmp_input = te.lang.cce.vmuls(data, cof)
            tmp = data_tmp_input
        
        #按axis进行求和,降维
        res_tmp = te.lang.cce.sum(tmp, axis=axis)
        #进行数据类型的转换
        res = te.lang.cce.cast_to(res_tmp, inp_dtype, f1628IntegerFlag = True)
        if operation == "MEAN":
            size = shape1[-1]
            sqrt_size = size ** (-0.5)
            res = te.lang.cce.vmuls(res_tmp, sqrt_size)
        #生成算子计算的schedule对象
        sch = generic.auto_schedule(res)
    #定义编译参数
    config = {"print_ir": need_print,
              "need_build": need_build,
              "name": kernel_name,
              "tensor_list": [data, res]}
    #编译算子,生成目标文件
    te.lang.cce.cce_build_code(sch, config)

#以shape为(2,3,4),datatype为float16,axis为1,op为SUM,coeff为2,算子名称为Reduction的参数来调用reduction算子
if __name__ == "__main__":
    reduction((2, 3, 4), "float16", 1, "SUM", coeff = 2,kernel_name = "Reduction")
分享:

    相关文档

    相关产品