Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
"automount",
"AWSGPU",
"batchv",
"Biren",
"burstable",
"Cambricon",
"CDNA",
"Cerebras",
"certgen",
"certificaterequests",
"certmanager",
Expand Down Expand Up @@ -78,6 +81,7 @@
"greptime",
"greptimedb",
"healthz",
"Hygon",
"iface",
"imageutils",
"influxdata",
Expand Down
13 changes: 13 additions & 0 deletions api/v1/gpu_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,24 @@ type GPUStatus struct {
// +kubebuilder:default=Pending
Phase TensorFusionGPUPhase `json:"phase"`

// +kubebuilder:default="NVIDIA"
Vendor string `json:"vendor"`

// +optional
Model string `json:"model,omitempty"`

Capacity *Resource `json:"capacity"`
Available *Resource `json:"available"`

UUID string `json:"uuid"`

// +optional
Index *int32 `json:"index,omitempty"`

// When it's -1, it means the GPU is not assigned to any NUMA node
// +optional
NUMANode *int32 `json:"numaNode,omitempty"`

// The host match selector to schedule worker pods
NodeSelector map[string]string `json:"nodeSelector"`
GPUModel string `json:"gpuModel"`
Expand Down
4 changes: 4 additions & 0 deletions api/v1/gpupool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ import (

// GPUPoolSpec defines the desired state of GPUPool.
type GPUPoolSpec struct {

// +optional
DefaultUsingLocalGPU *bool `json:"defaultUsingLocalGPU,omitempty"`

CapacityConfig *CapacityConfig `json:"capacityConfig,omitempty"`

NodeManagerConfig *NodeManagerConfig `json:"nodeManagerConfig,omitempty"`
Expand Down
6 changes: 6 additions & 0 deletions api/v1/gpuresourcequota_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,16 @@ type AllocRequest struct {
// Resource requirements for the allocation
Request Resource
Limit Resource
// Specific GPU indices to allocate, empty slice means any index
GPUIndices []int32
// Number of GPUs to allocate
Count uint
// Specific GPU model to allocate, empty string means any model
GPUModel string

// Specific GPU vendor to allocate, default to any if empty
GPUVendor string

// Node affinity requirements
NodeAffinity *v1.NodeAffinity

Expand Down
48 changes: 42 additions & 6 deletions api/v1/schedulingconfigtemplate_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,52 @@ type HypervisorScheduling struct {

// Hypervisor will move low priority jobs to pending queue if GPU is full
// This config can adjust hypervisor's queueing behavior to balance the co-scheduling CUDA calls
MultiProcessQueuing MultiProcessQueuing `json:"multiProcessQueuing,omitempty"`
}
ElasticRateLimitParameters ElasticRateLimitParameters `json:"elasticRateLimitParameters,omitempty"`

type MultiProcessQueuing struct {
// +optional
Enable *bool `json:"enable,omitempty"`
// For differentiate QoS levels, ensure critical and high QoS workloads on same GPU card getting more computing resources
MultiProcessQueuingParameters MultiProcessQueuingParameters `json:"multiProcessQueuingParameters,omitempty"`
}

type MultiProcessQueuingParameters struct {
// Condition for triggering scale down when usage is above ComputingThresholdForPreempt
ComputingThresholdForPreempt string `json:"computingThresholdForPreempt,omitempty"`
TriggerPreemptDuration string `json:"triggerPreemptDuration,omitempty"`

// Condition for triggering scale up when usage is below ComputingThresholdForResume
ComputingThresholdForResume string `json:"computingThresholdForResume,omitempty"`
TriggerResumeDuration string `json:"triggerResumeDuration,omitempty"`

// Coefficient for scale down when resource contention happens
CoefficientLow string `json:"coefficientLow,omitempty"`
CoefficientMedium string `json:"coefficientMedium,omitempty"`
CoefficientHigh string `json:"coefficientHigh,omitempty"`

// When avg utilization < ComputingThresholdForResume and last for more than TriggerResumeDuration
// Use following formula to scale up:
// Case #1 If all process has same QoS level, and cur_limit <= limit, fast resume to limit
// Case #2 Else, Max(limit, Min(cur_limit * 1/CoEfficient * SlowStartRatio, cur_limit * 1.2))
SlowStartRatio string `json:"slowStartRatio,omitempty"`
}

type ElasticRateLimitParameters struct {
// Refill rate is controlled by PID controller, adjusted by current utilization
MaxRefillRate string `json:"maxRefillRate,omitempty"`
MinRefillRate string `json:"minRefillRate,omitempty"`

// Filter ineffective requests from rate limit, 0.0 to 1.0
FilterAlpha string `json:"filterAlpha,omitempty"`

Interval string `json:"interval,omitempty"`
// PID controller parameters
Ki string `json:"ki,omitempty"`
Kd string `json:"kd,omitempty"`
Kp string `json:"kp,omitempty"`

QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"`
// Burst window to control token bucket Min/Max (currentCapacity = burstWindow x currentRefillRate)
BurstWindow string `json:"burstWindow,omitempty"`
// token bucket min and max
CapacityMin string `json:"capacityMin,omitempty"`
CapacityMax string `json:"capacityMax,omitempty"`
}

// SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.
Expand Down
9 changes: 8 additions & 1 deletion api/v1/workloadprofile_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1
import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// +kubebuilder:validation:Enum=low;medium;high;critical
Expand Down Expand Up @@ -69,6 +70,12 @@ type WorkloadProfileSpec struct {
// The number of GPUs to be used by the workload, default to 1
GPUCount uint32 `json:"gpuCount,omitempty"`

// Specify GPU indices for precise control of scheduling
GPUIndices []int32 `json:"gpuIndices,omitempty"`

// Specify GPU vendor for precise control of scheduling
GPUVendor string `json:"vendor,omitempty"`

// +optional
// AutoScalingConfig configured here will override Pool's schedulingConfig
// This field can not be fully supported in annotation, if user want to enable auto-scaling in annotation,
Expand All @@ -81,7 +88,7 @@ type WorkloadProfileSpec struct {

// +optional
// WorkerPodTemplate is the template for the worker pod, only take effect in remote vGPU mode
WorkerPodTemplate *v1.PodTemplateSpec `json:"workerPodTemplate,omitempty"`
WorkerPodTemplate *runtime.RawExtension `json:"workerPodTemplate,omitempty"`
}

// +kubebuilder:validation:Enum=shared;soft;hard
Expand Down
63 changes: 47 additions & 16 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions charts/tensor-fusion/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.6.1
version: 1.7.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.47.2"
appVersion: "1.48.2"
2 changes: 2 additions & 0 deletions charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ spec:
x-kubernetes-preserve-unknown-fields: true
type: object
type: object
defaultUsingLocalGPU:
type: boolean
nodeManagerConfig:
properties:
nodeCompaction:
Expand Down
14 changes: 14 additions & 0 deletions charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,23 @@ spec:
type: object
gpuModel:
type: string
index:
format: int32
type: integer
message:
type: string
model:
type: string
nodeSelector:
additionalProperties:
type: string
description: The host match selector to schedule worker pods
type: object
numaNode:
description: When it's -1, it means the GPU is not assigned to any
NUMA node
format: int32
type: integer
phase:
default: Pending
enum:
Expand Down Expand Up @@ -166,6 +176,9 @@ spec:
type: string
uuid:
type: string
vendor:
default: NVIDIA
type: string
required:
- available
- capacity
Expand All @@ -174,6 +187,7 @@ spec:
- nodeSelector
- phase
- uuid
- vendor
type: object
type: object
served: true
Expand Down
Loading
Loading