NexusGPU · Code2Life · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -16,8 +16,11 @@
         "automount",
         "AWSGPU",
         "batchv",
+        "Biren",
         "burstable",
+        "Cambricon",
         "CDNA",
+        "Cerebras",
         "certgen",
         "certificaterequests",
         "certmanager",
@@ -78,6 +81,7 @@
         "greptime",
         "greptimedb",
         "healthz",
+        "Hygon",
         "iface",
         "imageutils",
         "influxdata",

diff --git a/api/v1/gpu_types.go b/api/v1/gpu_types.go
@@ -26,11 +26,24 @@ type GPUStatus struct {
 	// +kubebuilder:default=Pending
 	Phase TensorFusionGPUPhase `json:"phase"`
 
+	// +kubebuilder:default="NVIDIA"
+	Vendor string `json:"vendor"`
+
+	// +optional
+	Model string `json:"model,omitempty"`
+
 	Capacity  *Resource `json:"capacity"`
 	Available *Resource `json:"available"`
 
 	UUID string `json:"uuid"`
 
+	// +optional
+	Index *int32 `json:"index,omitempty"`
+
+	// When it's -1, it means the GPU is not assigned to any NUMA node
+	// +optional
+	NUMANode *int32 `json:"numaNode,omitempty"`
+
 	// The host match selector to schedule worker pods
 	NodeSelector map[string]string `json:"nodeSelector"`
 	GPUModel     string            `json:"gpuModel"`

diff --git a/api/v1/gpupool_types.go b/api/v1/gpupool_types.go
@@ -29,6 +29,10 @@ import (
 
 // GPUPoolSpec defines the desired state of GPUPool.
 type GPUPoolSpec struct {
+
+	// +optional
+	DefaultUsingLocalGPU *bool `json:"defaultUsingLocalGPU,omitempty"`
+
 	CapacityConfig *CapacityConfig `json:"capacityConfig,omitempty"`
 
 	NodeManagerConfig *NodeManagerConfig `json:"nodeManagerConfig,omitempty"`

diff --git a/api/v1/gpuresourcequota_types.go b/api/v1/gpuresourcequota_types.go
@@ -173,10 +173,16 @@ type AllocRequest struct {
 	// Resource requirements for the allocation
 	Request Resource
 	Limit   Resource
+	// Specific GPU indices to allocate, empty slice means any index
+	GPUIndices []int32
 	// Number of GPUs to allocate
 	Count uint
 	// Specific GPU model to allocate, empty string means any model
 	GPUModel string
+
+	// Specific GPU vendor to allocate, default to any if empty
+	GPUVendor string
+
 	// Node affinity requirements
 	NodeAffinity *v1.NodeAffinity
 

diff --git a/api/v1/schedulingconfigtemplate_types.go b/api/v1/schedulingconfigtemplate_types.go
@@ -251,16 +251,52 @@ type HypervisorScheduling struct {
 
 	// Hypervisor will move low priority jobs to pending queue if GPU is full
 	// This config can adjust hypervisor's queueing behavior to balance the co-scheduling CUDA calls
-	MultiProcessQueuing MultiProcessQueuing `json:"multiProcessQueuing,omitempty"`
-}
+	ElasticRateLimitParameters ElasticRateLimitParameters `json:"elasticRateLimitParameters,omitempty"`
 
-type MultiProcessQueuing struct {
 	// +optional
-	Enable *bool `json:"enable,omitempty"`
+	// For differentiate QoS levels, ensure critical and high QoS workloads on same GPU card getting more computing resources
+	MultiProcessQueuingParameters MultiProcessQueuingParameters `json:"multiProcessQueuingParameters,omitempty"`
+}
+
+type MultiProcessQueuingParameters struct {
+	// Condition for triggering scale down when usage is above ComputingThresholdForPreempt
+	ComputingThresholdForPreempt string `json:"computingThresholdForPreempt,omitempty"`
+	TriggerPreemptDuration       string `json:"triggerPreemptDuration,omitempty"`
+
+	// Condition for triggering scale up when usage is below ComputingThresholdForResume
+	ComputingThresholdForResume string `json:"computingThresholdForResume,omitempty"`
+	TriggerResumeDuration       string `json:"triggerResumeDuration,omitempty"`
+
+	// Coefficient for scale down when resource contention happens
+	CoefficientLow    string `json:"coefficientLow,omitempty"`
+	CoefficientMedium string `json:"coefficientMedium,omitempty"`
+	CoefficientHigh   string `json:"coefficientHigh,omitempty"`
+
+	// When avg utilization < ComputingThresholdForResume and last for more than TriggerResumeDuration
+	// Use following formula to scale up:
+	// Case #1 If all process has same QoS level, and cur_limit <= limit, fast resume to limit
+	// Case #2 Else, Max(limit, Min(cur_limit * 1/CoEfficient * SlowStartRatio, cur_limit * 1.2))
+	SlowStartRatio string `json:"slowStartRatio,omitempty"`
+}
+
+type ElasticRateLimitParameters struct {
+	// Refill rate is controlled by PID controller, adjusted by current utilization
+	MaxRefillRate string `json:"maxRefillRate,omitempty"`
+	MinRefillRate string `json:"minRefillRate,omitempty"`
+
+	// Filter ineffective requests from rate limit, 0.0 to 1.0
+	FilterAlpha string `json:"filterAlpha,omitempty"`
 
-	Interval string `json:"interval,omitempty"`
+	// PID controller parameters
+	Ki string `json:"ki,omitempty"`
+	Kd string `json:"kd,omitempty"`
+	Kp string `json:"kp,omitempty"`
 
-	QueueLevelTimeSlices []string `json:"queueLevelTimeSlices,omitempty"`
+	// Burst window to control token bucket Min/Max (currentCapacity = burstWindow x currentRefillRate)
+	BurstWindow string `json:"burstWindow,omitempty"`
+	// token bucket min and max
+	CapacityMin string `json:"capacityMin,omitempty"`
+	CapacityMax string `json:"capacityMax,omitempty"`
 }
 
 // SchedulingConfigTemplateStatus defines the observed state of SchedulingConfigTemplate.

diff --git a/api/v1/workloadprofile_types.go b/api/v1/workloadprofile_types.go
@@ -19,6 +19,7 @@ package v1
 import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
 )
 
 // +kubebuilder:validation:Enum=low;medium;high;critical
@@ -69,6 +70,12 @@ type WorkloadProfileSpec struct {
 	// The number of GPUs to be used by the workload, default to 1
 	GPUCount uint32 `json:"gpuCount,omitempty"`
 
+	// Specify GPU indices for precise control of scheduling
+	GPUIndices []int32 `json:"gpuIndices,omitempty"`
+
+	// Specify GPU vendor for precise control of scheduling
+	GPUVendor string `json:"vendor,omitempty"`
+
 	// +optional
 	// AutoScalingConfig configured here will override Pool's schedulingConfig
 	// This field can not be fully supported in annotation, if user want to enable auto-scaling in annotation,
@@ -81,7 +88,7 @@ type WorkloadProfileSpec struct {
 
 	// +optional
 	// WorkerPodTemplate is the template for the worker pod, only take effect in remote vGPU mode
-	WorkerPodTemplate *v1.PodTemplateSpec `json:"workerPodTemplate,omitempty"`
+	WorkerPodTemplate *runtime.RawExtension `json:"workerPodTemplate,omitempty"`
 }
 
 // +kubebuilder:validation:Enum=shared;soft;hard

diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.6.1
+version: 1.7.1
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.47.2"
+appVersion: "1.48.2"
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
@@ -245,6 +245,8 @@ spec:
                         x-kubernetes-preserve-unknown-fields: true
                     type: object
                 type: object
+              defaultUsingLocalGPU:
+                type: boolean
               nodeManagerConfig:
                 properties:
                   nodeCompaction:

diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml
@@ -121,13 +121,23 @@ spec:
                 type: object
               gpuModel:
                 type: string
+              index:
+                format: int32
+                type: integer
               message:
                 type: string
+              model:
+                type: string
               nodeSelector:
                 additionalProperties:
                   type: string
                 description: The host match selector to schedule worker pods
                 type: object
+              numaNode:
+                description: When it's -1, it means the GPU is not assigned to any
+                  NUMA node
+                format: int32
+                type: integer
               phase:
                 default: Pending
                 enum:
@@ -166,6 +176,9 @@ spec:
                 type: string
               uuid:
                 type: string
+              vendor:
+                default: NVIDIA
+                type: string
             required:
             - available
             - capacity
@@ -174,6 +187,7 @@ spec:
             - nodeSelector
             - phase
             - uuid
+            - vendor
             type: object
         type: object
     served: true