文档首页/
AI开发平台ModelArts/
最佳实践/
LLM大语言模型推理/
LLM大语言模型推理历史版本文档/
主流开源大模型基于Lite Server&Cluster适配Ascend-vLLM PyTorch NPU推理指导(6.5.905)/
附录/
部署kubeinfer插件脚本
更新时间:2025-08-20 GMT+08:00
部署kubeinfer插件脚本
kubeinfer-crd.yaml文件用于部署KubeInfer插件,默认部署2个实例。
在Cluster安装kubeinfer时,会使用到该脚本。
apiVersion: apps/v1 kind: Deployment metadata: name: modelarts-infers-operator namespace: default spec: replicas: 2 selector: matchLabels: infer: modelarts-infers-operator template: metadata: labels: infer: modelarts-infers-operator annotations: seccomp.security.alpha.kubernetes.io/pod: runtime/default spec: serviceAccountName: modelarts-infers-operator-account automountServiceAccountToken: true securityContext: runAsUser: 1000 runAsGroup: 1000 fsGroup: 1000 seccompProfile: type: RuntimeDefault containers: - name: modelarts-infers-operator command: - /bin/sh - -ec - | sudo chown root:servicegroup /run/secrets; sudo chown root:servicegroup /run/secrets/kubernetes.io; sudo chmod 750 /run/secrets; sudo chmod 750 /run/secrets/kubernetes.io; sudo sed -i '/# Defaults specification/a\Defaults passwd_timeout=1' /etc/sudoers; sudo rm -f /etc/sudo.conf; sudo sh -c 'sed -i "s/%servicegroup ALL=(ALL) NOPASSWD.*:ALL/#%servicegroup ALL=(ALL) NOPASSWD:NOPASSWD:ALL/g" /etc/sudoers && chmod 600 /run/faillock/*'; sh /opt/cloud/modelarts-infers-operator/run_standard.sh securityContext: runAsUser: 1000 runAsGroup: 1000 image: swr.cn-east-4.myhuaweicloud.com/atelier/modelarts-infers-operator:1.5.0.20250508155821 imagePullPolicy: IfNotPresent env: - name: authentication_mode value: "ServiceAccount" - name: business_type value: "KubeInfer" - name: scc_config_path value: "/opt/cloud/modelarts-infers-operator/conf/scc.conf" livenessProbe: exec: command: - curl - --fail - http://127.0.0.1:8084/healthz initialDelaySeconds: 15 periodSeconds: 20 readinessProbe: exec: command: - curl - --fail - http://127.0.0.1:8084/readyz initialDelaySeconds: 5 periodSeconds: 10 resources: requests: memory: 300Mi cpu: 500m limits: memory: 300Mi cpu: 500m --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null name: modelarts-infers-operator-role rules: - apiGroups: [ "apiextensions.k8s.io" ] resources: [ "customresourcedefinitions"] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "infer.modelarts.huaweicloud" ] resources: ["*"] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "" ] resources: [ "services", "configmaps", "pods", "replicasets" , "persistentvolumeclaims", "persistentvolumes", "events" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "" ] resources: [ "nodes" ] verbs: [ "get", "list", "watch" ] - apiGroups: [ "apps" ] resources: [ "deployments", "replicasets" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "coordination.k8s.io" ] resources: [ "leases" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "networking.k8s.io" ] resources: [ "networkpolicies" ] verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ] - apiGroups: [ "" ] resources: [ "serviceaccounts" ] verbs: [ "get", "list", "watch", "create", "update", "patch" ] - apiGroups: [ "rbac.authorization.k8s.io" ] resources: [ "clusterrolebindings" ] verbs: [ "get", "list", "watch", "create", "update", "patch" ] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: modelarts-infers-operator-rb roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: modelarts-infers-operator-role subjects: - kind: ServiceAccount name: modelarts-infers-operator-account namespace: default --- apiVersion: v1 kind: ServiceAccount metadata: name: modelarts-infers-operator-account namespace: default --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: creationTimestamp: null name: modelarts-infers-sidecar-role
父主题: 附录