更新时间:2025-07-29 GMT+08:00
分享

vllm_multi_node.yaml

vllm_multi_node.yaml文件用于多机部署推理服务。

用户可修改的字段详情见基于KubeInfer多机部署推理服务中的步骤1:配置模型及镜像参数。

kind: KubeInfer
apiVersion: infer.modelarts.huaweicloud/v1
metadata:
  labels:
    app.modelarts.huaweicloud/instanceGroupId: vllm
  namespace: default
  name: infer-vllm
spec:
  networkServices:
    - name: svc-infer-vllm
      spec:
        ports:
          - nodePort: 30080    # nodePort范围是30000-32767
            port: 8080
            protocol: TCP
            targetPort: 8080
        type: NodePort
  replicas: 1                  # 定义DeepSeek实例个数,扩缩容时修改该值
  strategy:
    rollingUpdate:
      maxSurge: 20%
      maxUnavailable: 20%
    type: InstanceRollingUpdate
  template:
    spec:
      roles:
        - recoveryPolicy:
            restart: Instance
            type: Restart
          replicas: 1
          template:
            spec:
              automountServiceAccountToken: false
              containers:
                - env:
                    - name: ASCEND_910_ENABLE
                      value: "true"
                    - name: GLOBAL_RANK_TABLE_PATH
                      value: /user/global/config
                    - name: GLOBAL_RANK_TABLE_FILE_NAME
                      value: global_rank_table.json
                  image: ${image_name}
                  imagePullPolicy: IfNotPresent
                  command:
                    - /bin/sh
                    - -c
                    - /mnt/deepseek/scripts/run_vllm_multi_node.sh head
                  name: infer-vllm-head
                  livenessProbe:
                    httpGet:
                      path: /health
                      port: 8080
                    initialDelaySeconds: 2400     #容器启动后,开始探测vllm服务的时长,需要根据sfs_turbo读取速度调整
                    periodSeconds: 10
                    timeoutSeconds: 10
                    failureThreshold: 6
                    successThreshold: 1
                  resources:
                    limits:
                      cpu: "176"
                      huawei.com/ascend-1980: "8"
                      memory: 750Gi
                    requests:
                      cpu: "176"
                      huawei.com/ascend-1980: "8"
                      memory: 750Gi
                  securityContext:
                    runAsUser: 0
                  terminationMessagePath: /dev/termination-log
                  terminationMessagePolicy: File
                  volumeMounts:
                    - mountPath: /dev/shm
                      name: shm-volume
                    - mountPath: /usr/local/Ascend/driver
                      name: ascend-driver-path-pv
                      readOnly: true
                    - mountPath: /usr/local/sbin
                      name: ascend-npu-smi-path-pv
                      readOnly: true
                    - mountPath: /etc/ascend_install.info
                      name: ascend-install-info-path-pv
                      readOnly: true
                    - mountPath: /mnt/deepseek/
                      name: model-path
              dnsPolicy: ClusterFirst
              imagePullSecrets:
                - name: default-secret
              restartPolicy: Always
              schedulerName: volcano
              terminationGracePeriodSeconds: 240
              tolerations:
                - effect: NoSchedule
                  key: davinci
                  value: davinci
                - effect: NoExecute
                  key: node.kubernetes.io/not-ready
                  operator: Exists
                  tolerationSeconds: 30
                - effect: NoExecute
                  key: node.kubernetes.io/unreachable
                  operator: Exists
                  tolerationSeconds: 30
              volumes:
                - emptyDir:
                    medium: Memory
                    sizeLimit: 375Gi
                  name: shm-volume
                - hostPath:
                    path: /usr/local/Ascend/driver
                    type: Directory
                  name: ascend-driver-path-pv
                - hostPath:
                    path: /usr/local/sbin
                    type: Directory
                  name: ascend-npu-smi-path-pv
                - hostPath:
                    path: /etc/ascend_install.info
                    type: File
                  name: ascend-install-info-path-pv
                - hostPath:
                    path: /mnt/deepseek/
                    type: Directory
                  name: model-path
        - recoveryPolicy:
            restart: Instance
            type: Restart
          replicas: 1
          template:
            spec:
              automountServiceAccountToken: false
              containers:
                - env:
                    - name: ASCEND_910_ENABLE
                      value: "true"
                    - name: GLOBAL_RANK_TABLE_PATH
                      value: /user/global/config
                    - name: GLOBAL_RANK_TABLE_FILE_NAME
                      value: global_rank_table.json
                  image: ${image_name}
                  imagePullPolicy: IfNotPresent
                  command:
                    - /bin/sh
                    - -c
                    - /mnt/deepseek/scripts/run_vllm_multi_node.sh worker
                  name: infer-vllm-worker-1
                  resources:
                    limits:
                      cpu: "176"
                      huawei.com/ascend-1980: "8"
                      memory: 750Gi
                    requests:
                      cpu: "176"
                      huawei.com/ascend-1980: "8"
                      memory: 750Gi
                  securityContext:
                    runAsUser: 0
                  terminationMessagePath: /dev/termination-log
                  terminationMessagePolicy: File
                  volumeMounts:
                    - mountPath: /dev/shm
                      name: shm-volume
                    - mountPath: /usr/local/Ascend/driver
                      name: ascend-driver-path-pv
                      readOnly: true
                    - mountPath: /usr/local/sbin
                      name: ascend-npu-smi-path-pv
                      readOnly: true
                    - mountPath: /etc/ascend_install.info
                      name: ascend-install-info-path-pv
                      readOnly: true
                    - mountPath: /mnt/deepseek/
                      name: model-path
              dnsPolicy: ClusterFirst
              imagePullSecrets:
                - name: default-secret
              restartPolicy: Always
              schedulerName: volcano
              terminationGracePeriodSeconds: 240
              tolerations:
                - effect: NoSchedule
                  key: davinci
                  value: davinci
                - effect: NoExecute
                  key: node.kubernetes.io/not-ready
                  operator: Exists
                  tolerationSeconds: 30
                - effect: NoExecute
                  key: node.kubernetes.io/unreachable
                  operator: Exists
                  tolerationSeconds: 30
              volumes:
                - emptyDir:
                    medium: Memory
                    sizeLimit: 375Gi
                  name: shm-volume
                - hostPath:
                    path: /usr/local/Ascend/driver
                    type: Directory
                  name: ascend-driver-path-pv
                - hostPath:
                    path: /usr/local/sbin
                    type: Directory
                  name: ascend-npu-smi-path-pv
                - hostPath:
                    path: /etc/ascend_install.info
                    type: File
                  name: ascend-install-info-path-pv
                - hostPath:
                    path: /mnt/deepseek/
                    type: Directory
                  name: model-path

相关文档