更新时间:2025-07-29 GMT+08:00
分享

vllm_single_node.yaml

vllm_single_node.yaml文件用于单机部署推理服务。

用户可修改的字段详情见基于K8s Deployment单机部署推理服务中的步骤1:配置模型及镜像参数。

apiVersion: apps/v1
kind: Deployment
metadata:
  name: infer-vllm
  labels:
    app: infer-vllm
spec:
  replicas: 1      # 实例数,扩缩容时编辑修改该值
  selector:
    matchLabels:
      app: infer-vllm
  template:
    metadata:
      labels:
        app: infer-vllm
    spec:
      schedulerName: volcano
      nodeSelector:
        accelerator/huawei-npu: ascend-1980
      affinity:     # 亲和性调度,优先调度在同一节点,资源不足时允许分散到其他节点,如果考虑可靠性,建议去掉亲和性配置。
        podAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchLabels:
                    app: qwen3
                topologyKey: kubernetes.io/hostname
      containers:
        - image: ${image_name}                           # 推理镜像名称,按实际修改
          imagePullPolicy: IfNotPresent
          name: infer-vllm                               # 容器名
          securityContext:
            runAsUser: 0
          ports:
            - containerPort: 8080
          command: ["/bin/bash", "-c"]
          args: ["/mnt/deepseek/scripts/run_vllm_single_node.sh"]
          livenessProbe:
            httpGet:
              path: /health
              port: 8080
            initialDelaySeconds: 600
            periodSeconds: 10
            timeoutSeconds: 10
            failureThreshold: 6
            successThreshold: 1
          resources:
            requests:
              cpu: "44"
              huawei.com/ascend-1980: "2"             # 需求卡数,key保持不变。
              memory: 100Gi
            limits:
              cpu: "44"
              huawei.com/ascend-1980: "2"             # 限制卡数,key保持不变。
              memory: 100Gi
          volumeMounts:                               # 容器内部映射路径
            - name: ascend-driver                     # 驱动挂载,保持不动
              mountPath: /usr/local/Ascend/driver
            - name: ascend-add-ons                    # 驱动挂载,保持不动
              mountPath: /usr/local/Ascend/add-ons
            - name: hccn                              # 驱动hccn配置,保持不动
              mountPath: /etc/hccn.conf
            - name: localtime
              mountPath: /etc/localtime
            - name: npu-smi                           # npu-smi
              mountPath: /usr/local/sbin/npu-smi
            - name: model-path
              mountPath: /mnt/deepseek
      volumes:                                        # 物理机外部路径
        - name: ascend-driver
          hostPath:
            path: /usr/local/Ascend/driver
        - name: ascend-add-ons
          hostPath:
            path: /usr/local/Ascend/add-ons
        - name: hccn
          hostPath:
            path: /etc/hccn.conf
        - name: localtime
          hostPath:
            path: /etc/localtime
        - name: npu-smi
          hostPath:
            path: /usr/local/sbin/npu-smi
        - name: model-path
          hostPath:
            path: /mnt/deepseek

相关文档