diff --git a/apps.yaml b/apps.yaml index eb2b8bc2d5..6bf3b42ec9 100644 --- a/apps.yaml +++ b/apps.yaml @@ -184,7 +184,7 @@ appsInfo: chartName: knative-operator kserve: title: Kserve - appVersion: 0.15.2 + appVersion: 0.16.0 repo: http://github.com/kserve/kserve maintainers: Kserve relatedLinks: diff --git a/chart/chart-index/Chart.yaml b/chart/chart-index/Chart.yaml index 4f919a62a7..fb8e1f06e7 100644 --- a/chart/chart-index/Chart.yaml +++ b/chart/chart-index/Chart.yaml @@ -68,10 +68,10 @@ dependencies: version: v1.18.1 repository: https://knative.github.io/operator - name: kserve-crd - version: v0.15.2 + version: v0.16.0 repository: oci://ghcr.io/kserve/charts/kserve-crd - name: kserve - version: v0.15.2 + version: v0.16.0 repository: oci://ghcr.io/kserve/charts/kserve - name: kube-prometheus-stack version: 79.0.1 diff --git a/charts/kserve/Chart.yaml b/charts/kserve/Chart.yaml index c8e02c48ad..4ce86ad184 100644 --- a/charts/kserve/Chart.yaml +++ b/charts/kserve/Chart.yaml @@ -1,9 +1,9 @@ apiVersion: v1 -name: kserve-resources -version: v0.15.2 description: Helm chart for deploying kserve resources keywords: - - kserve - - modelmesh +- kserve +- modelmesh +name: kserve sources: - - http://github.com/kserve/kserve \ No newline at end of file +- http://github.com/kserve/kserve +version: v0.16.0 diff --git a/charts/kserve/README.md b/charts/kserve/README.md index 3e02ec6057..a86b4f5ee5 100644 --- a/charts/kserve/README.md +++ b/charts/kserve/README.md @@ -2,14 +2,14 @@ Helm chart for deploying kserve resources -![Version: v0.15.2](https://img.shields.io/badge/Version-v0.15.2-informational?style=flat-square) +![Version: v0.16.0](https://img.shields.io/badge/Version-v0.16.0-informational?style=flat-square) ## Installing the Chart To install the chart, run the following: ```console -$ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 +$ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.16.0 ``` ## Values @@ -17,11 +17,13 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | Key | Type | Default | Description | |-----|------|---------|-------------| | kserve.agent.image | string | `"kserve/agent"` | | -| kserve.agent.tag | string | `"v0.15.2"` | | +| kserve.agent.tag | string | `"v0.16.0"` | | +| kserve.autoscaler.scaleDownStabilizationWindowSeconds | string | `"300"` | | +| kserve.autoscaler.scaleUpStabilizationWindowSeconds | string | `"0"` | | | kserve.controller.affinity | object | `{}` | A Kubernetes Affinity, if required. For more information, see [Affinity v1 core](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#affinity-v1-core). For example: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: foo.bar.com/role operator: In values: - master | | kserve.controller.annotations | object | `{}` | Optional additional annotations to add to the controller deployment. | | kserve.controller.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"privileged":false,"readOnlyRootFilesystem":true,"runAsNonRoot":true}` | Container Security Context to be set on the controller component container. For more information, see [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/). | -| kserve.controller.deploymentMode | string | `"Serverless"` | KServe deployment mode: "Serverless", "RawDeployment". | +| kserve.controller.deploymentMode | string | `"Knative"` | KServe deployment mode: "Standard", "Knative". | | kserve.controller.gateway.additionalIngressDomains | list | `[]` | Optional additional domains for ingress routing. | | kserve.controller.gateway.disableIngressCreation | bool | `false` | Whether to disable ingress creation for RawDeployment mode. | | kserve.controller.gateway.disableIstioVirtualHost | bool | `false` | DisableIstioVirtualHost controls whether to use istio as network layer for top level component routing or path based routing. This configuration is only applicable for Serverless mode, when disabled Istio is no longer required. | @@ -57,9 +59,9 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.controller.rbacProxy.securityContext.runAsNonRoot | bool | `true` | | | kserve.controller.rbacProxyImage | string | `"quay.io/brancz/kube-rbac-proxy:v0.18.0"` | KServe controller manager rbac proxy contrainer image | | kserve.controller.resources | object | `{"limits":{"cpu":"100m","memory":"300Mi"},"requests":{"cpu":"100m","memory":"300Mi"}}` | Resources to provide to the kserve controller pod. For example: requests: cpu: 10m memory: 32Mi For more information, see [Resource Management for Pods and Containers](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/). | -| kserve.controller.securityContext | object | `{"runAsNonRoot":true}` | Pod Security Context. For more information, see [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/). | +| kserve.controller.securityContext | object | `{"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}` | Pod Security Context. For more information, see [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/). | | kserve.controller.serviceAnnotations | object | `{}` | Optional additional annotations to add to the controller service. | -| kserve.controller.tag | string | `"v0.15.2"` | KServe controller contrainer image tag. | +| kserve.controller.tag | string | `"v0.16.0"` | KServe controller contrainer image tag. | | kserve.controller.tolerations | list | `[]` | A list of Kubernetes Tolerations, if required. For more information, see [Toleration v1 core](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#toleration-v1-core). For example: tolerations: - key: foo.bar.com/role operator: Equal value: master effect: NoSchedule | | kserve.controller.topologySpreadConstraints | list | `[]` | A list of Kubernetes TopologySpreadConstraints, if required. For more information, see [Topology spread constraint v1 core](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#topologyspreadconstraint-v1-core For example: topologySpreadConstraints: - maxSkew: 2 topologyKey: topology.kubernetes.io/zone whenUnsatisfiable: ScheduleAnyway labelSelector: matchLabels: app.kubernetes.io/instance: kserve-controller-manager app.kubernetes.io/component: controller | | kserve.controller.webhookServiceAnnotations | object | `{}` | Optional additional annotations to add to the webhook service. | @@ -74,10 +76,10 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.localmodel.agent.reconcilationFrequencyInSecs | int | `60` | | | kserve.localmodel.agent.securityContext.runAsNonRoot | bool | `true` | | | kserve.localmodel.agent.securityContext.runAsUser | int | `1000` | | -| kserve.localmodel.agent.tag | string | `"v0.15.2"` | | +| kserve.localmodel.agent.tag | string | `"v0.16.0"` | | | kserve.localmodel.agent.tolerations | list | `[]` | | | kserve.localmodel.controller.image | string | `"kserve/kserve-localmodel-controller"` | | -| kserve.localmodel.controller.tag | string | `"v0.15.2"` | | +| kserve.localmodel.controller.tag | string | `"v0.16.0"` | | | kserve.localmodel.disableVolumeManagement | bool | `false` | | | kserve.localmodel.enabled | bool | `false` | | | kserve.localmodel.jobNamespace | string | `"kserve-localmodel-jobs"` | | @@ -85,17 +87,25 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.localmodel.securityContext.fsGroup | int | `1000` | | | kserve.metricsaggregator.enableMetricAggregation | string | `"false"` | configures metric aggregation annotation. This adds the annotation serving.kserve.io/enable-metric-aggregation to every service with the specified boolean value. If true enables metric aggregation in queue-proxy by setting env vars in the queue proxy container to configure scraping ports. | | kserve.metricsaggregator.enablePrometheusScraping | string | `"false"` | If true, prometheus annotations are added to the pod to scrape the metrics. If serving.kserve.io/enable-metric-aggregation is false, the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port. | +| kserve.opentelemetryCollector.metricReceiverEndpoint | string | `"keda-otel-scaler.keda.svc:4317"` | | +| kserve.opentelemetryCollector.metricScalerEndpoint | string | `"keda-otel-scaler.keda.svc:4318"` | | +| kserve.opentelemetryCollector.resource.cpuLimit | string | `"1"` | | +| kserve.opentelemetryCollector.resource.cpuRequest | string | `"200m"` | | +| kserve.opentelemetryCollector.resource.memoryLimit | string | `"2Gi"` | | +| kserve.opentelemetryCollector.resource.memoryRequest | string | `"512Mi"` | | +| kserve.opentelemetryCollector.scrapeInterval | string | `"5s"` | | | kserve.router.image | string | `"kserve/router"` | | | kserve.router.imagePullPolicy | string | `"IfNotPresent"` | Specifies when to pull router image from registry. | | kserve.router.imagePullSecrets | list | `[]` | specifies the list of secrets to be used for pulling the router image from registry. | -| kserve.router.tag | string | `"v0.15.2"` | | +| kserve.router.tag | string | `"v0.16.0"` | | | kserve.security.autoMountServiceAccountToken | bool | `true` | | | kserve.service.serviceClusterIPNone | bool | `false` | | -| kserve.servingruntime.art.defaultVersion | string | `"v0.15.2"` | | +| kserve.servingruntime.art.defaultVersion | string | `"v0.16.0"` | | | kserve.servingruntime.art.image | string | `"kserve/art-explainer"` | | | kserve.servingruntime.art.imagePullSecrets | list | `[]` | | | kserve.servingruntime.huggingfaceserver.devShm.enabled | bool | `false` | | | kserve.servingruntime.huggingfaceserver.devShm.sizeLimit | string | `""` | | +| kserve.servingruntime.huggingfaceserver.disabled | bool | `false` | | | kserve.servingruntime.huggingfaceserver.hostIPC.enabled | bool | `false` | | | kserve.servingruntime.huggingfaceserver.image | string | `"kserve/huggingfaceserver"` | | | kserve.servingruntime.huggingfaceserver.imagePullSecrets | list | `[]` | | @@ -104,7 +114,8 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.huggingfaceserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.huggingfaceserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.huggingfaceserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.huggingfaceserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.huggingfaceserver.tag | string | `"v0.16.0"` | | +| kserve.servingruntime.huggingfaceserver_multinode.disabled | bool | `false` | | | kserve.servingruntime.huggingfaceserver_multinode.imagePullSecrets | list | `[]` | | | kserve.servingruntime.huggingfaceserver_multinode.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.huggingfaceserver_multinode.securityContext.capabilities.drop[0] | string | `"ALL"` | | @@ -112,13 +123,15 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.huggingfaceserver_multinode.securityContext.runAsNonRoot | bool | `true` | | | kserve.servingruntime.huggingfaceserver_multinode.shm.enabled | bool | `true` | | | kserve.servingruntime.huggingfaceserver_multinode.shm.sizeLimit | string | `"3Gi"` | | +| kserve.servingruntime.lgbserver.disabled | bool | `false` | | | kserve.servingruntime.lgbserver.image | string | `"kserve/lgbserver"` | | | kserve.servingruntime.lgbserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.lgbserver.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.lgbserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.lgbserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.lgbserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.lgbserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.lgbserver.tag | string | `"v0.16.0"` | | +| kserve.servingruntime.mlserver.disabled | bool | `false` | | | kserve.servingruntime.mlserver.image | string | `"docker.io/seldonio/mlserver"` | | | kserve.servingruntime.mlserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.mlserver.modelClassPlaceholder | string | `"{{.Labels.modelClass}}"` | | @@ -128,27 +141,31 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.mlserver.securityContext.runAsNonRoot | bool | `true` | | | kserve.servingruntime.mlserver.tag | string | `"1.5.0"` | | | kserve.servingruntime.modelNamePlaceholder | string | `"{{.Name}}"` | | +| kserve.servingruntime.paddleserver.disabled | bool | `false` | | | kserve.servingruntime.paddleserver.image | string | `"kserve/paddleserver"` | | | kserve.servingruntime.paddleserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.paddleserver.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.paddleserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.paddleserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.paddleserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.paddleserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.paddleserver.tag | string | `"v0.16.0"` | | +| kserve.servingruntime.pmmlserver.disabled | bool | `false` | | | kserve.servingruntime.pmmlserver.image | string | `"kserve/pmmlserver"` | | | kserve.servingruntime.pmmlserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.pmmlserver.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.pmmlserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.pmmlserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.pmmlserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.pmmlserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.pmmlserver.tag | string | `"v0.16.0"` | | +| kserve.servingruntime.sklearnserver.disabled | bool | `false` | | | kserve.servingruntime.sklearnserver.image | string | `"kserve/sklearnserver"` | | | kserve.servingruntime.sklearnserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.sklearnserver.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.sklearnserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.sklearnserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.sklearnserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.sklearnserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.sklearnserver.tag | string | `"v0.16.0"` | | +| kserve.servingruntime.tensorflow.disabled | bool | `false` | | | kserve.servingruntime.tensorflow.image | string | `"tensorflow/serving"` | | | kserve.servingruntime.tensorflow.imagePullSecrets | list | `[]` | | | kserve.servingruntime.tensorflow.securityContext.allowPrivilegeEscalation | bool | `false` | | @@ -157,6 +174,7 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.tensorflow.securityContext.runAsNonRoot | bool | `true` | | | kserve.servingruntime.tensorflow.securityContext.runAsUser | int | `1000` | | | kserve.servingruntime.tensorflow.tag | string | `"2.6.2"` | | +| kserve.servingruntime.torchserve.disabled | bool | `false` | | | kserve.servingruntime.torchserve.image | string | `"pytorch/torchserve-kfs"` | | | kserve.servingruntime.torchserve.imagePullSecrets | list | `[]` | | | kserve.servingruntime.torchserve.securityContext.allowPrivilegeEscalation | bool | `false` | | @@ -166,6 +184,7 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.torchserve.securityContext.runAsUser | int | `1000` | | | kserve.servingruntime.torchserve.serviceEnvelopePlaceholder | string | `"{{.Labels.serviceEnvelope}}"` | | | kserve.servingruntime.torchserve.tag | string | `"0.9.0"` | | +| kserve.servingruntime.tritonserver.disabled | bool | `false` | | | kserve.servingruntime.tritonserver.image | string | `"nvcr.io/nvidia/tritonserver"` | | | kserve.servingruntime.tritonserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.tritonserver.securityContext.allowPrivilegeEscalation | bool | `false` | | @@ -174,13 +193,14 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.servingruntime.tritonserver.securityContext.runAsNonRoot | bool | `true` | | | kserve.servingruntime.tritonserver.securityContext.runAsUser | int | `1000` | | | kserve.servingruntime.tritonserver.tag | string | `"23.05-py3"` | | +| kserve.servingruntime.xgbserver.disabled | bool | `false` | | | kserve.servingruntime.xgbserver.image | string | `"kserve/xgbserver"` | | | kserve.servingruntime.xgbserver.imagePullSecrets | list | `[]` | | | kserve.servingruntime.xgbserver.securityContext.allowPrivilegeEscalation | bool | `false` | | | kserve.servingruntime.xgbserver.securityContext.capabilities.drop[0] | string | `"ALL"` | | | kserve.servingruntime.xgbserver.securityContext.privileged | bool | `false` | | | kserve.servingruntime.xgbserver.securityContext.runAsNonRoot | bool | `true` | | -| kserve.servingruntime.xgbserver.tag | string | `"v0.15.2"` | | +| kserve.servingruntime.xgbserver.tag | string | `"v0.16.0"` | | | kserve.storage.caBundleConfigMapName | string | `""` | Mounted CA bundle config map name for storage initializer. | | kserve.storage.caBundleVolumeMountPath | string | `"/etc/ssl/custom-certs"` | Mounted path for CA bundle config map. | | kserve.storage.containerSecurityContext.allowPrivilegeEscalation | bool | `false` | | @@ -191,6 +211,10 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.storage.enableModelcar | bool | `true` | Flag for enabling model sidecar feature. | | kserve.storage.image | string | `"kserve/storage-initializer"` | | | kserve.storage.memoryModelcar | string | `"15Mi"` | Model sidecar memory requirement. | +| kserve.storage.resources.limits.cpu | string | `"1"` | | +| kserve.storage.resources.limits.memory | string | `"1Gi"` | | +| kserve.storage.resources.requests.cpu | string | `"100m"` | | +| kserve.storage.resources.requests.memory | string | `"100Mi"` | | | kserve.storage.s3 | object | `{"CABundle":"","accessKeyIdName":"AWS_ACCESS_KEY_ID","endpoint":"","region":"","secretAccessKeyName":"AWS_SECRET_ACCESS_KEY","useAnonymousCredential":"","useHttps":"","useVirtualBucket":"","verifySSL":""}` | Configurations for S3 storage | | kserve.storage.s3.CABundle | string | `""` | The path to the certificate bundle to use for HTTPS certificate validation. | | kserve.storage.s3.accessKeyIdName | string | `"AWS_ACCESS_KEY_ID"` | AWS S3 static access key id. | @@ -203,5 +227,6 @@ $ helm install kserve oci://ghcr.io/kserve/charts/kserve --version v0.15.2 | kserve.storage.s3.verifySSL | string | `""` | Whether to verify the tls/ssl certificate, default to true. | | kserve.storage.storageSecretNameAnnotation | string | `"serving.kserve.io/secretName"` | Storage secret name reference for storage initializer. | | kserve.storage.storageSpecSecretName | string | `"storage-config"` | Storage spec secret name. | -| kserve.storage.tag | string | `"v0.15.2"` | | -| kserve.version | string | `"v0.15.2"` | | +| kserve.storage.tag | string | `"v0.16.0"` | | +| kserve.storage.uidModelcar | int | `1010` | Model sidecar UID. | +| kserve.version | string | `"v0.16.0"` | | diff --git a/charts/kserve/crds/serving.kserve.io_clusterservingruntimes.yaml b/charts/kserve/crds/serving.kserve.io_clusterservingruntimes.yaml index 743e43f67e..3a07da6d13 100644 --- a/charts/kserve/crds/serving.kserve.io_clusterservingruntimes.yaml +++ b/charts/kserve/crds/serving.kserve.io_clusterservingruntimes.yaml @@ -792,6 +792,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -2771,6 +2773,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: diff --git a/charts/kserve/crds/serving.kserve.io_clusterstoragecontainers.yaml b/charts/kserve/crds/serving.kserve.io_clusterstoragecontainers.yaml index 944ed597fc..95118871ca 100644 --- a/charts/kserve/crds/serving.kserve.io_clusterstoragecontainers.yaml +++ b/charts/kserve/crds/serving.kserve.io_clusterstoragecontainers.yaml @@ -260,6 +260,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -714,6 +716,9 @@ spec: type: string type: object type: array + supportsMultiModelDownload: + default: false + type: boolean workloadType: default: initContainer type: string diff --git a/charts/kserve/crds/serving.kserve.io_inferencegraphs.yaml b/charts/kserve/crds/serving.kserve.io_inferencegraphs.yaml index 1ad71720d1..3ba5e80c85 100644 --- a/charts/kserve/crds/serving.kserve.io_inferencegraphs.yaml +++ b/charts/kserve/crds/serving.kserve.io_inferencegraphs.yaml @@ -560,6 +560,21 @@ spec: x-kubernetes-int-or-string: true type: object type: object + routerTimeouts: + properties: + serverIdle: + format: int64 + type: integer + serverRead: + format: int64 + type: integer + serverWrite: + format: int64 + type: integer + serviceClient: + format: int64 + type: integer + type: object scaleMetric: enum: - cpu diff --git a/charts/kserve/crds/serving.kserve.io_inferenceservices.yaml b/charts/kserve/crds/serving.kserve.io_inferenceservices.yaml index 757e12d736..33f9526762 100644 --- a/charts/kserve/crds/serving.kserve.io_inferenceservices.yaml +++ b/charts/kserve/crds/serving.kserve.io_inferenceservices.yaml @@ -735,6 +735,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -1191,11 +1193,92 @@ spec: type: object autoScaling: properties: + behavior: + properties: + scaleDown: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + scaleUp: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object metrics: items: properties: external: properties: + authenticationRef: + properties: + authModes: + type: string + authenticationRef: + properties: + name: + type: string + required: + - name + type: object + required: + - authenticationRef + type: object metric: properties: backend: @@ -1582,6 +1665,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -2336,6 +2421,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -2802,6 +2889,19 @@ spec: - request - response type: string + storage: + properties: + key: + type: string + parameters: + additionalProperties: + type: string + type: object + path: + type: string + serviceAccountName: + type: string + type: object url: type: string type: object @@ -3017,6 +3117,23 @@ spec: type: boolean shareProcessNamespace: type: boolean + storageUris: + items: + properties: + mountPath: + default: /mnt/models + maxLength: 255 + pattern: ^/.* + type: string + uri: + minLength: 1 + type: string + required: + - uri + type: object + minItems: 1 + type: array + x-kubernetes-list-type: atomic subdomain: type: string terminationGracePeriodSeconds: @@ -4331,11 +4448,92 @@ spec: type: object autoScaling: properties: + behavior: + properties: + scaleDown: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + scaleUp: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object metrics: items: properties: external: properties: + authenticationRef: + properties: + authModes: + type: string + authenticationRef: + properties: + name: + type: string + required: + - name + type: object + required: + - authenticationRef + type: object metric: properties: backend: @@ -4722,6 +4920,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -5466,6 +5666,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -6163,6 +6365,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -6846,6 +7050,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -7316,6 +7522,19 @@ spec: - request - response type: string + storage: + properties: + key: + type: string + parameters: + additionalProperties: + type: string + type: object + path: + type: string + serviceAccountName: + type: string + type: object url: type: string type: object @@ -7558,6 +7777,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -8263,6 +8484,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -8963,6 +9186,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -9650,6 +9875,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -10344,6 +10571,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -11210,6 +11439,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -11664,6 +11895,23 @@ spec: workingDir: type: string type: object + storageUris: + items: + properties: + mountPath: + default: /mnt/models + maxLength: 255 + pattern: ^/.* + type: string + uri: + minLength: 1 + type: string + required: + - uri + type: object + minItems: 1 + type: array + x-kubernetes-list-type: atomic subdomain: type: string tensorflow: @@ -11899,6 +12147,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -12666,6 +12916,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -14581,6 +14833,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -15288,6 +15542,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -16003,6 +16259,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -17739,6 +17997,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -18644,11 +18904,92 @@ spec: type: object autoScaling: properties: + behavior: + properties: + scaleDown: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + scaleUp: + properties: + policies: + items: + properties: + periodSeconds: + format: int32 + type: integer + type: + type: string + value: + format: int32 + type: integer + required: + - periodSeconds + - type + - value + type: object + type: array + x-kubernetes-list-type: atomic + selectPolicy: + type: string + stabilizationWindowSeconds: + format: int32 + type: integer + tolerance: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object metrics: items: properties: external: properties: + authenticationRef: + properties: + authModes: + type: string + authenticationRef: + properties: + name: + type: string + required: + - name + type: object + required: + - authenticationRef + type: object metric: properties: backend: @@ -19035,6 +19376,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -19789,6 +20132,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -20255,6 +20600,19 @@ spec: - request - response type: string + storage: + properties: + key: + type: string + parameters: + additionalProperties: + type: string + type: object + path: + type: string + serviceAccountName: + type: string + type: object url: type: string type: object @@ -20470,6 +20828,23 @@ spec: type: boolean shareProcessNamespace: type: boolean + storageUris: + items: + properties: + mountPath: + default: /mnt/models + maxLength: 255 + pattern: ^/.* + type: string + uri: + minLength: 1 + type: string + required: + - uri + type: object + minItems: 1 + type: array + x-kubernetes-list-type: atomic subdomain: type: string terminationGracePeriodSeconds: @@ -21354,6 +21729,8 @@ spec: additionalProperties: type: string type: object + clusterServingRuntimeName: + type: string components: additionalProperties: properties: @@ -21500,6 +21877,8 @@ spec: observedGeneration: format: int64 type: integer + servingRuntimeName: + type: string url: type: string type: object diff --git a/charts/kserve/crds/serving.kserve.io_servingruntimes.yaml b/charts/kserve/crds/serving.kserve.io_servingruntimes.yaml index f174ebc7a0..efd0857ae2 100644 --- a/charts/kserve/crds/serving.kserve.io_servingruntimes.yaml +++ b/charts/kserve/crds/serving.kserve.io_servingruntimes.yaml @@ -792,6 +792,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: @@ -2771,6 +2773,8 @@ spec: - port type: object type: object + stopSignal: + type: string type: object livenessProbe: properties: diff --git a/charts/kserve/templates/clusterservingruntimes.yaml b/charts/kserve/templates/clusterservingruntimes.yaml index 172e55192e..a5ae412bfd 100644 --- a/charts/kserve/templates/clusterservingruntimes.yaml +++ b/charts/kserve/templates/clusterservingruntimes.yaml @@ -3,6 +3,7 @@ kind: ClusterServingRuntime metadata: name: kserve-lgbserver spec: + disabled: {{ .Values.kserve.servingruntime.lgbserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -44,6 +45,7 @@ kind: ClusterServingRuntime metadata: name: kserve-mlserver spec: + disabled: {{ .Values.kserve.servingruntime.mlserver.disabled }} annotations: # mlserver version 1.1.0 uses port 8082 as default instead of 8080. prometheus.kserve.io/port: '8080' @@ -117,6 +119,7 @@ kind: ClusterServingRuntime metadata: name: kserve-paddleserver spec: + disabled: {{ .Values.kserve.servingruntime.paddleserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -157,6 +160,7 @@ kind: ClusterServingRuntime metadata: name: kserve-pmmlserver spec: + disabled: {{ .Values.kserve.servingruntime.pmmlserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -201,6 +205,7 @@ kind: ClusterServingRuntime metadata: name: kserve-sklearnserver spec: + disabled: {{ .Values.kserve.servingruntime.sklearnserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -241,6 +246,7 @@ kind: ClusterServingRuntime metadata: name: kserve-tensorflow-serving spec: + disabled: {{ .Values.kserve.servingruntime.tensorflow.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -288,6 +294,7 @@ kind: ClusterServingRuntime metadata: name: kserve-torchserve spec: + disabled: {{ .Values.kserve.servingruntime.torchserve.disabled }} annotations: prometheus.kserve.io/port: '8082' prometheus.kserve.io/path: "/metrics" @@ -333,6 +340,7 @@ kind: ClusterServingRuntime metadata: name: kserve-tritonserver spec: + disabled: {{ .Values.kserve.servingruntime.tritonserver.disabled }} annotations: prometheus.kserve.io/port: '8002' prometheus.kserve.io/path: "/metrics" @@ -394,6 +402,7 @@ kind: ClusterServingRuntime metadata: name: kserve-xgbserver spec: + disabled: {{ .Values.kserve.servingruntime.xgbserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -434,6 +443,7 @@ kind: ClusterServingRuntime metadata: name: kserve-huggingfaceserver spec: + disabled: {{ .Values.kserve.servingruntime.huggingfaceserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" @@ -491,6 +501,7 @@ kind: ClusterServingRuntime metadata: name: kserve-huggingfaceserver-multinode spec: + disabled: {{ .Values.kserve.servingruntime.huggingfaceserver.disabled }} annotations: prometheus.kserve.io/port: '8080' prometheus.kserve.io/path: "/metrics" diff --git a/charts/kserve/templates/clusterstoragecontainer.yaml b/charts/kserve/templates/clusterstoragecontainer.yaml index 2aa54468b0..370f8e8c87 100644 --- a/charts/kserve/templates/clusterstoragecontainer.yaml +++ b/charts/kserve/templates/clusterstoragecontainer.yaml @@ -7,12 +7,9 @@ spec: name: storage-initializer image: "{{ .Values.kserve.storage.image }}:{{ .Values.kserve.storage.tag }}" resources: - requests: - memory: 100Mi - cpu: 100m - limits: - memory: 1Gi - cpu: "1" + {{- with .Values.kserve.storage.resources }} + {{- toYaml . | nindent 6 }} + {{- end }} securityContext: {{- with .Values.kserve.storage.containerSecurityContext}} {{- toYaml . | nindent 6 }} @@ -27,3 +24,4 @@ spec: - regex: "https://(.+?).file.core.windows.net/(.+)" - regex: "https?://(.+)/(.+)" workloadType: initContainer + supportsMultiModelDownload: true diff --git a/charts/kserve/templates/configmap.yaml b/charts/kserve/templates/configmap.yaml index 4a458b0116..4cc94f9a57 100644 --- a/charts/kserve/templates/configmap.yaml +++ b/charts/kserve/templates/configmap.yaml @@ -114,8 +114,8 @@ data: "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", - "enableDirectPvcVolumeMount": true, "enableModelcar": false, + "uidModelcar": 10, "cpuModelcar": "10m", "memoryModelcar": "15Mi" } @@ -136,11 +136,6 @@ data: # cpuLimit is the limits.cpu to set for the storage initializer init container. "cpuLimit": "1", - # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly. - # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container. - # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737 - "enableDirectPvcVolumeMount": true, - # enableModelcar enabled allows you to directly access an OCI container image by # using a source URL with an "oci://" schema. "enableModelcar": false, @@ -241,7 +236,7 @@ data: # ====================================== INGRESS CONFIGURATION ====================================== # Example ingress: |- - { + { "enableGatewayApi": false, "kserveIngressGateway" : "kserve/kserve-ingress-gateway", "ingressGateway" : "knative-serving/knative-ingress-gateway", @@ -256,17 +251,17 @@ data: "disableIngressCreation": false } ingress: |- - { + { # enableGatewayApi specifies whether to use Gateway API instead of Ingress for serving external traffic. "enableGatewayApi": false, - - # KServe implements [Gateway API](https://gateway-api.sigs.k8s.io/) to serve external traffic. + + # KServe implements [Gateway API](https://gateway-api.sigs.k8s.io/) to serve external traffic. # By default, KServe configures a default gateway to serve external traffic. # But, KServe can be configured to use a custom gateway by modifying this configuration. # The gateway should be specified in format / # NOTE: This configuration only applicable for raw deployment. "kserveIngressGateway": "kserve/kserve-ingress-gateway", - + # ingressGateway specifies the ingress gateway to serve external traffic. # The gateway should be specified in format / # NOTE: This configuration only applicable for serverless deployment with Istio configured as network layer. @@ -480,7 +475,7 @@ data: # imagePullPolicy specifies when the router image should be pulled from registry. "imagePullPolicy": "IfNotPresent", - + # imagePullSecrets specifies the list of secrets to be used for pulling the router image from registry. # https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ "imagePullSecrets": ["docker-secret"] @@ -620,7 +615,7 @@ data: } } ingress: |- - { + { "enableGatewayApi": {{ .Values.kserve.controller.gateway.ingressGateway.enableGatewayApi }}, "kserveIngressGateway" : "{{ .Values.kserve.controller.gateway.ingressGateway.kserveGateway }}", "ingressGateway" : "{{ .Values.kserve.controller.gateway.ingressGateway.gateway }}", @@ -655,10 +650,10 @@ data: "memoryLimit": "1Gi", "cpuRequest": "100m", "cpuLimit": "1", - "enableDirectPvcVolumeMount": true, "caBundleConfigMapName": "{{ .Values.kserve.storage.caBundleConfigMapName }}", "caBundleVolumeMountPath": "{{ .Values.kserve.storage.caBundleVolumeMountPath }}", "enableModelcar": {{ .Values.kserve.storage.enableModelcar }}, + "uidModelcar": {{ .Values.kserve.storage.uidModelcar }}, "cpuModelcar": "{{ .Values.kserve.storage.cpuModelcar }}", "memoryModelcar": "{{ .Values.kserve.storage.memoryModelcar }}" } @@ -694,7 +689,19 @@ data: opentelemetryCollector: |- { - "scrapeInterval": "5s", - "metricReceiverEndpoint": "keda-otel-scaler.keda.svc:4317", - "metricScalerEndpoint": "keda-otel-scaler.keda.svc:4318" + "scrapeInterval": "{{ .Values.kserve.opentelemetryCollector.scrapeInterval }}", + "metricReceiverEndpoint": "{{ .Values.kserve.opentelemetryCollector.metricReceiverEndpoint }}", + "metricScalerEndpoint": "{{ .Values.kserve.opentelemetryCollector.metricScalerEndpoint }}", + "resource": { + "cpuLimit": "{{ .Values.kserve.opentelemetryCollector.resource.cpuLimit }}", + "memoryLimit": "{{ .Values.kserve.opentelemetryCollector.resource.memoryLimit }}", + "cpuRequest": "{{ .Values.kserve.opentelemetryCollector.resource.cpuRequest }}", + "memoryRequest": "{{ .Values.kserve.opentelemetryCollector.resource.memoryRequest }}" + } + } + + autoscaler: |- + { + "scaleUpStabilizationWindowSeconds": "{{ .Values.kserve.autoscaler.scaleUpStabilizationWindowSeconds }}", + "scaleDownStabilizationWindowSeconds": "{{ .Values.kserve.autoscaler.scaleDownStabilizationWindowSeconds }}" } diff --git a/charts/kserve/templates/localmodel/deployment.yaml b/charts/kserve/templates/localmodel/deployment.yaml index 40840e5daa..8bfb34b368 100644 --- a/charts/kserve/templates/localmodel/deployment.yaml +++ b/charts/kserve/templates/localmodel/deployment.yaml @@ -26,6 +26,8 @@ spec: serviceAccountName: kserve-localmodel-controller-manager securityContext: runAsNonRoot: true + seccompProfile: + type: RuntimeDefault containers: - command: - /manager diff --git a/charts/kserve/templates/webhookconfiguration.yaml b/charts/kserve/templates/webhookconfiguration.yaml index 27e82aade5..2070c462a6 100644 --- a/charts/kserve/templates/webhookconfiguration.yaml +++ b/charts/kserve/templates/webhookconfiguration.yaml @@ -218,3 +218,60 @@ webhooks: - DELETE resources: - localmodelcaches +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + creationTimestamp: null + name: llminferenceservice.serving.kserve.io + annotations: + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/serving-cert +webhooks: + - clientConfig: + service: + name: kserve-webhook-server-service + namespace: {{ .Release.Namespace }} + path: /validate-serving-kserve-io-v1alpha1-llminferenceservice + failurePolicy: Fail + name: llminferenceservice.kserve-webhook-server.validator + sideEffects: None + admissionReviewVersions: [ "v1", "v1beta1" ] + rules: + - apiGroups: + - serving.kserve.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - llminferenceservices +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + creationTimestamp: null + name: llminferenceserviceconfig.serving.kserve.io + annotations: + cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/serving-cert +webhooks: + - clientConfig: + service: + name: kserve-webhook-server-service + namespace: {{ .Release.Namespace }} + path: /validate-serving-kserve-io-v1alpha1-llminferenceserviceconfig + failurePolicy: Fail + name: llminferenceserviceconfig.kserve-webhook-server.validator + sideEffects: None + admissionReviewVersions: [ "v1", "v1beta1" ] + rules: + - apiGroups: + - serving.kserve.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + - DELETE + resources: + - llminferenceserviceconfigs \ No newline at end of file diff --git a/charts/kserve/values.yaml b/charts/kserve/values.yaml index 595118a95a..60dfc97384 100644 --- a/charts/kserve/values.yaml +++ b/charts/kserve/values.yaml @@ -1,5 +1,5 @@ kserve: - version: &defaultVersion v0.15.2 + version: &defaultVersion v0.16.0 agent: image: kserve/agent tag: *defaultVersion @@ -15,7 +15,13 @@ kserve: storage: image: kserve/storage-initializer tag: *defaultVersion - + resources: + requests: + memory: 100Mi + cpu: 100m + limits: + memory: 1Gi + cpu: "1" # security context for the default storage container containerSecurityContext: allowPrivilegeEscalation: false @@ -28,6 +34,9 @@ kserve: # -- Flag for enabling model sidecar feature. enableModelcar: true + # -- Model sidecar UID. + uidModelcar: 1010 + # -- Model sidecar cpu requirement. cpuModelcar: 10m @@ -85,8 +94,8 @@ kserve: # the prometheus port is set with the default prometheus scraping port 9090, otherwise the prometheus port annotation is set with the metric aggregation port. enablePrometheusScraping: "false" controller: - # -- KServe deployment mode: "Serverless", "RawDeployment". - deploymentMode: "Serverless" + # -- KServe deployment mode: "Standard", "Knative". + deploymentMode: "Knative" # -- KServe controller manager rbac proxy contrainer image rbacProxyImage: quay.io/brancz/kube-rbac-proxy:v0.18.0 @@ -129,6 +138,8 @@ kserve: # For more information, see [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/). securityContext: runAsNonRoot: true + seccompProfile: + type: RuntimeDefault # -- Container Security Context to be set on the controller component container. # For more information, see [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/). @@ -270,6 +281,7 @@ kserve: servingruntime: modelNamePlaceholder: "{{.Name}}" tensorflow: + disabled: false image: tensorflow/serving tag: 2.6.2 imagePullSecrets: [] @@ -282,6 +294,7 @@ kserve: drop: - ALL mlserver: + disabled: false image: docker.io/seldonio/mlserver tag: 1.5.0 modelClassPlaceholder: "{{.Labels.modelClass}}" @@ -294,6 +307,7 @@ kserve: drop: - ALL sklearnserver: + disabled: false image: kserve/sklearnserver tag: *defaultVersion imagePullSecrets: [] @@ -305,6 +319,7 @@ kserve: drop: - ALL xgbserver: + disabled: false image: kserve/xgbserver tag: *defaultVersion imagePullSecrets: [] @@ -316,6 +331,7 @@ kserve: drop: - ALL huggingfaceserver: + disabled: false image: kserve/huggingfaceserver tag: *defaultVersion imagePullSecrets: [] @@ -333,6 +349,7 @@ kserve: hostIPC: enabled: false huggingfaceserver_multinode: + disabled: false imagePullSecrets: [] securityContext: allowPrivilegeEscalation: false @@ -345,6 +362,7 @@ kserve: enabled: true sizeLimit: "3Gi" tritonserver: + disabled: false image: nvcr.io/nvidia/tritonserver tag: 23.05-py3 imagePullSecrets: [] @@ -357,6 +375,7 @@ kserve: drop: - ALL pmmlserver: + disabled: false image: kserve/pmmlserver tag: *defaultVersion imagePullSecrets: [] @@ -368,6 +387,7 @@ kserve: drop: - ALL paddleserver: + disabled: false image: kserve/paddleserver tag: *defaultVersion imagePullSecrets: [] @@ -379,6 +399,7 @@ kserve: drop: - ALL lgbserver: + disabled: false image: kserve/lgbserver tag: *defaultVersion imagePullSecrets: [] @@ -390,6 +411,7 @@ kserve: drop: - ALL torchserve: + disabled: false image: pytorch/torchserve-kfs tag: 0.9.0 serviceEnvelopePlaceholder: "{{.Labels.serviceEnvelope}}" @@ -437,3 +459,15 @@ kserve: requests: cpu: "1" memory: "2Gi" + opentelemetryCollector: + scrapeInterval: "5s" + metricReceiverEndpoint: "keda-otel-scaler.keda.svc:4317" + metricScalerEndpoint: "keda-otel-scaler.keda.svc:4318" + resource: + cpuLimit: "1" + memoryLimit: "2Gi" + cpuRequest: "200m" + memoryRequest: "512Mi" + autoscaler: + scaleUpStabilizationWindowSeconds: "0" + scaleDownStabilizationWindowSeconds: "300"