Volcano调度器
插件介绍
Volcano 是一个基于 Kubernetes 的批处理平台,提供了机器学习、深度学习、生物信息学、基因组学及其他大数据应用所需要的而 Kubernetes 当下缺失的一系列特性。
字段说明
|
参数 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
basic |
否 |
表 basic object |
插件基础配置参数,无需指定。 |
|
flavor |
是 |
表3 object |
插件规格参数。 |
|
custom |
是 |
表4 object |
插件自定义参数。 |
|
参数 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
swr_addr |
是 |
String |
插件下载地址,无需指定。 |
|
swr_user |
是 |
String |
插件下载用户,无需指定。 |
|
platform |
是 |
String |
插件平台,无需指定。 |
|
escEndpoint |
是 |
String |
ecs地址,无需指定。 |
|
xccsEndpoint |
|
String |
xccs服务地址,无需指定。 |
|
参数 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
description |
否 |
String |
插件相关的描述信息。 |
|
name |
是 |
String |
插件规格名称。
|
|
replicas |
是 |
String |
实例数,默认为2。 |
|
resources |
是 |
resources object |
容器资源(CPU、内存)配额。 |
|
参数 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
multiAZEnabled |
否 |
Bool |
是否多AZ部署,默认false。
|
|
controller_kube_api_qps |
否 |
int |
controller组件的api server qps,默认200。 |
|
scheduler_kube_api_qps |
否 |
int |
scheduler组件的api server qps,默认200。 |
|
admission_kube_api_qps |
否 |
int |
admission组件的api server qps,默认200。 |
|
update_pod_status_qps |
否 |
int |
更新pod status qps,默认200。 |
|
admissions |
否 |
string |
volcano支持配置的webhook。 |
|
colocation_enable |
否 |
string |
是否支持混部。 |
|
oversubscription_ratio |
否 |
int |
动态超卖比率,默认60。 |
|
oversubscription_method |
否 |
string |
超卖量计算方法,目前支持nodeResource和podProfile,nodeResource为默认的基于节点资源用量的算法,podProfile为基于Pod实例画像的算法。默认使用nodeResource。 |
|
oversubscription_profile_period |
否 |
int |
Pod实例画像的周期,单位为秒。 |
|
workload_balancer_third_party_types |
否 |
string |
第三方工作负载的group + version + kind拼接成的字符串。 |
|
workload_balancer_score_annotation_key |
否 |
string |
指定Pod的分值注解key。 |
|
node_match_expressions |
否 |
Volcano负载Pod匹配node的表达式。 |
|
|
tolerations |
否 |
格式同k8s toleration 的格式,用来为Volcano负载Pod添加污点。 |
|
|
oversubscription_ratio |
否 |
int |
Volcano调度环境中Node资源的超分比例。 |
|
descheduler_enable |
否 |
Bool |
是否支持重调度。 |
|
enable_workload_balancer |
否 |
Bool |
是否支持负载均衡器。 |
|
default_scheduler_conf |
是 |
yaml |
格式同Volcano配置YAML。 |
|
deschedulerPolicy |
否 |
yaml |
格式同Volcano重调度配置YAML。 |
|
参数 |
是否必选 |
参数类型 |
描述 |
|---|---|---|---|
|
limitsCpu |
是 |
String |
CPU大小限制,单位:m。 默认值按照组件区分。 |
|
limitsMem |
是 |
String |
内存大小限制,单位:Mi 默认值按照组件区分。 |
|
name |
是 |
String |
插件名称 |
|
requestsCpu |
是 |
String |
申请的CPU大小,单位:m 默认值按照组件区分。 |
|
requestsMem |
是 |
String |
申请的内存大小,单位:Mi 默认为按照组件区分。 |
请求示例
{
"kind": "Addon",
"apiVersion": "v3",
"metadata": {
"annotations": {
"addon.install/type": "install"
}
},
"spec": {
"clusterID": "ad24dc34-******-0255ac100030",
"version": "1.16.8",
"addonTemplateName": "volcano",
"values": {
"basic": {
"ecsEndpoint": "x.x.x.x",
"platform": "linux-amd64",
"swr_addr": "swr.*******.com",
"swr_user": "hwofficial"
},
"flavor": {
"description": "For 50 nodes, 5000 pods in cluster",
"name": "Node50",
"resources": [
{
"name": "volcano-scheduler",
"limitsCpu": "2000m",
"requestsCpu": "500m",
"replicas": 2,
"limitsMem": "2000Mi",
"requestsMem": "500Mi"
},
{
"name": "volcano-controller",
"limitsCpu": "2000m",
"requestsCpu": "500m",
"replicas": 2,
"limitsMem": "2000Mi",
"requestsMem": "500Mi"
},
{
"name": "volcano-admission",
"limitsCpu": "500m",
"requestsCpu": "200m",
"replicas": 2,
"limitsMem": "500Mi",
"requestsMem": "500Mi"
},
{
"limitsCpu": "200m",
"limitsMem": "200Mi",
"name": "volcano-agent",
"requestsCpu": "100m",
"requestsMem": "150Mi"
},
{
"limitsCpu": "100m",
"limitsMem": "100Mi",
"name": "resource-exporter",
"requestsCpu": "50m",
"requestsMem": "50Mi"
},
{
"limitsCpu": "1000m",
"limitsMem": "512Mi",
"name": "volcano-descheduler",
"replicas": 2,
"requestsCpu": "500m",
"requestsMem": "256Mi"
},
{
"limitsCpu": "500m",
"limitsMem": "1000Mi",
"name": "volcano-recommender",
"replicas": 2,
"requestsCpu": "300m",
"requestsMem": "500Mi"
},
{
"limitsCpu": "300m",
"limitsMem": "300Mi",
"name": "volcano-recommender-prometheus-adapter",
"replicas": 2,
"requestsCpu": "200m",
"requestsMem": "200Mi"
}
],
"size": "small",
"category": [
"CCE",
"Turbo"
]
},
"custom": {
"admission_kube_api_qps": 200,
"admissions": "/jobs/mutate,/jobs/validate,/podgroups/mutate,/pods/validate,/pods/mutate,/queues/mutate,/queues/validate,/eas/pods/mutate,/eas/pods/validate,/npu/jobs/validate,/resource/validate,/resource/mutate,/workloadbalancer/balancer/validate,/workloadbalancer/balancerpolicytemplate/validate",
"colocation_enable": "false",
"controller_kube_api_qps": 200,
"default_scheduler_conf": {
"actions": "allocate, backfill, preempt",
"metrics": {
"interval": "30s",
"type": ""
},
"tiers": [
{
"plugins": [
{
"name": "priority"
},
{
"enableJobStarving": false,
"enablePreemptable": false,
"name": "gang"
},
{
"name": "conformance"
}
]
},
{
"plugins": [
{
"enablePreemptable": false,
"name": "drf"
},
{
"name": "predicates"
},
{
"name": "nodeorder"
}
]
},
{
"plugins": [
{
"name": "cce-gpu-topology-predicate"
},
{
"name": "cce-gpu-topology-priority"
},
{
"name": "xgpu"
}
]
},
{
"plugins": [
{
"name": "nodelocalvolume"
},
{
"name": "nodeemptydirvolume"
},
{
"name": "nodeCSIscheduling"
},
{
"name": "networkresource"
}
]
}
]
},
"deschedulerPolicy": {
"profiles": [
{
"name": "ProfileName",
"pluginConfig": [
{
"args": {
"nodeFit": true
},
"name": "DefaultEvictor"
},
{
"args": {
"evictableNamespaces": {
"exclude": [
"kube-system"
]
},
"thresholds": {
"cpu": 20,
"memory": 20
}
},
"name": "HighNodeUtilization"
},
{
"args": {
"evictableNamespaces": {
"exclude": [
"kube-system"
]
},
"metrics": {
"type": "prometheus_adaptor"
},
"nodeFit": true,
"targetThresholds": {
"cpu": 80,
"memory": 85
},
"thresholds": {
"cpu": 30,
"memory": 30
}
},
"name": "LoadAware"
}
],
"plugins": {
"balance": {
"enabled": null
}
}
}
]
},
"descheduler_enable": "false",
"deschedulingInterval": "10m",
"enable_workload_balancer": false,
"multiAZEnabled": false,
"node_match_expressions": [],
"oversubscription_method": "nodeResource",
"oversubscription_profile_period": 300,
"oversubscription_ratio": 60,
"scheduler_kube_api_qps": 200,
"tolerations": [
{
"effect": "NoExecute",
"key": "node.kubernetes.io/not-ready",
"operator": "Exists",
"tolerationSeconds": 60
},
{
"effect": "NoExecute",
"key": "node.kubernetes.io/unreachable",
"operator": "Exists",
"tolerationSeconds": 60
},
{
"effect": "NoSchedule",
"key": "node.cilium.io/agent-not-ready",
"operator": "Exists"
}
],
"update_pod_status_qps": 50,
"workload_balancer_score_annotation_key": "",
"workload_balancer_third_party_types": "",
"multiAZBalance": false
}
}
}
}