Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,34 +438,41 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);

Register VLENB = 0;
unsigned PreHandledNum = 0;
unsigned VLENBShift = 0;
unsigned PrevHandledNum = 0;
unsigned I = 0;
while (I != NumRegs) {
auto [LMulHandled, RegClass, Opcode] =
getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
bool IsLast = I + RegNumHandled == NumRegs;
if (PreHandledNum) {
if (PrevHandledNum) {
Register Step;
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
int64_t Offset = *VLEN / 8 * PreHandledNum;
int64_t Offset = *VLEN / 8 * PrevHandledNum;
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
if (!VLENB) {
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
}
uint32_t ShiftAmount = Log2_32(PreHandledNum);
if (ShiftAmount == 0)
Step = VLENB;
else {
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
.addReg(VLENB, getKillRegState(IsLast))
.addImm(ShiftAmount);
uint32_t ShiftAmount = Log2_32(PrevHandledNum);
// To avoid using an extra register, we shift the VLENB register and
// remember how much it has been shifted. We can then use relative
// shifts to adjust to the desired shift amount.
if (VLENBShift > ShiftAmount) {
BuildMI(MBB, II, DL, TII->get(RISCV::SRLI), VLENB)
.addReg(VLENB, RegState::Kill)
.addImm(VLENBShift - ShiftAmount);
} else if (VLENBShift < ShiftAmount) {
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VLENB)
.addReg(VLENB, RegState::Kill)
.addImm(ShiftAmount - VLENBShift);
}
VLENBShift = ShiftAmount;
Step = VLENB;
}

BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
Expand All @@ -489,7 +496,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
if (IsSpill)
MIB.addReg(Reg, RegState::Implicit);

PreHandledNum = RegNumHandled;
PrevHandledNum = RegNumHandled;
RegEncoding += RegNumHandled;
I += RegNumHandled;
}
Expand Down
244 changes: 244 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/pr165232.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"

define i1 @main(ptr %var_117, ptr %arrayinit.element3045, ptr %arrayinit.element3047, ptr %arrayinit.element3049, ptr %arrayinit.element3051, ptr %arrayinit.element3053, ptr %arrayinit.element3055, ptr %arrayinit.element3057, ptr %arrayinit.element3059, ptr %arrayinit.element3061, ptr %arrayinit.element3063, ptr %arrayinit.element3065, ptr %arrayinit.element3067, i64 %var_94_i.07698, target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1) {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr t0, vlenb
; CHECK-NEXT: slli t0, t0, 3
; CHECK-NEXT: mv t1, t0
; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: add t0, t0, t1
; CHECK-NEXT: sub sp, sp, t0
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs4r.v v12, (a1) # vscale x 32-byte Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vs4r.v v16, (a1) # vscale x 32-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t0, 56(a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t1, 48(a1)
; CHECK-NEXT: vsetvli t2, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t2, 40(a1)
; CHECK-NEXT: # kill: def $v10 killed $v9 killed $vtype
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t3, 32(a1)
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t4, 16(a1)
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: ld t5, 24(a1)
; CHECK-NEXT: vmv.v.i v13, 0
; CHECK-NEXT: vsetvli t6, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v22, 0
; CHECK-NEXT: vmv1r.v v14, v9
; CHECK-NEXT: sd zero, 0(a0)
; CHECK-NEXT: vmv.v.i v24, 0
; CHECK-NEXT: vmv1r.v v15, v9
; CHECK-NEXT: vmv1r.v v18, v9
; CHECK-NEXT: li t6, 1023
; CHECK-NEXT: vmv.v.i v26, 0
; CHECK-NEXT: vmv1r.v v19, v9
; CHECK-NEXT: slli t6, t6, 52
; CHECK-NEXT: vmv.v.i v28, 0
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs2r.v v22, (a1) # vscale x 16-byte Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vs4r.v v24, (a1) # vscale x 32-byte Folded Spill
; CHECK-NEXT: slli a2, a2, 1
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
; CHECK-NEXT: vs2r.v v28, (a1) # vscale x 16-byte Folded Spill
; CHECK-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: vmv1r.v v20, v9
; CHECK-NEXT: sd t6, 0(t5)
; CHECK-NEXT: vmv2r.v v16, v14
; CHECK-NEXT: vmv2r.v v14, v12
; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: vmv1r.v v11, v9
; CHECK-NEXT: vmv1r.v v21, v9
; CHECK-NEXT: csrr t5, vlenb
; CHECK-NEXT: slli t5, t5, 3
; CHECK-NEXT: add t5, sp, t5
; CHECK-NEXT: addi t5, t5, 16
; CHECK-NEXT: vs2r.v v18, (t5) # vscale x 16-byte Folded Spill
; CHECK-NEXT: csrr t6, vlenb
; CHECK-NEXT: slli t6, t6, 1
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: vs2r.v v20, (t5) # vscale x 16-byte Folded Spill
; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v19, 0
; CHECK-NEXT: vmclr.m v10
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv.v.i v6, 0
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmv1r.v v20, v19
; CHECK-NEXT: vmv1r.v v3, v19
; CHECK-NEXT: vmv1r.v v5, v19
; CHECK-NEXT: vmv1r.v v2, v19
; CHECK-NEXT: vmv1r.v v31, v19
; CHECK-NEXT: vmv1r.v v30, v19
; CHECK-NEXT: vmv1r.v v4, v19
; CHECK-NEXT: vmv2r.v v22, v10
; CHECK-NEXT: vmv4r.v v24, v12
; CHECK-NEXT: vmv2r.v v28, v16
; CHECK-NEXT: vmv2r.v v8, v6
; CHECK-NEXT: vmv1r.v v18, v19
; CHECK-NEXT: vmv1r.v v21, v10
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vle32.v v20, (t4)
; CHECK-NEXT: vle32.v v3, (t1)
; CHECK-NEXT: vle32.v v30, (a7)
; CHECK-NEXT: vle64.v v8, (a4)
; CHECK-NEXT: vle32.v v5, (t2)
; CHECK-NEXT: vle32.v v2, (t3)
; CHECK-NEXT: vle32.v v31, (a6)
; CHECK-NEXT: vmv1r.v v24, v30
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vv v21, v8, v6, v0.t
; CHECK-NEXT: vmv1r.v v8, v19
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
; CHECK-NEXT: vle32.v v18, (a2)
; CHECK-NEXT: vle32.v v8, (a3)
; CHECK-NEXT: vle32.v v4, (a5)
; CHECK-NEXT: vmv1r.v v22, v20
; CHECK-NEXT: csrr t5, vlenb
; CHECK-NEXT: slli t5, t5, 3
; CHECK-NEXT: add t5, sp, t5
; CHECK-NEXT: addi t5, t5, 16
; CHECK-NEXT: vl1r.v v1, (t5) # vscale x 8-byte Folded Reload
; CHECK-NEXT: csrr t6, vlenb
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: vl2r.v v2, (t5) # vscale x 16-byte Folded Reload
; CHECK-NEXT: slli t6, t6, 1
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: vl1r.v v4, (t5) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsseg4e32.v v1, (zero)
; CHECK-NEXT: vsseg8e32.v v22, (a1)
; CHECK-NEXT: vmv1r.v v0, v21
; CHECK-NEXT: vssub.vv v8, v19, v18, v0.t
; CHECK-NEXT: csrr t5, vlenb
; CHECK-NEXT: slli t5, t5, 2
; CHECK-NEXT: mv t6, t5
; CHECK-NEXT: slli t5, t5, 1
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: add t5, sp, t5
; CHECK-NEXT: addi t5, t5, 16
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
; CHECK-NEXT: vsetvli zero, t0, e64, m2, ta, ma
; CHECK-NEXT: vsseg2e64.v v20, (zero)
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: addi t5, sp, 16
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
; CHECK-NEXT: csrr t6, vlenb
; CHECK-NEXT: slli t6, t6, 2
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
; CHECK-NEXT: vsetivli zero, 0, e64, m2, ta, ma
; CHECK-NEXT: vsseg4e64.v v20, (zero), v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vsseg8e32.v v8, (a0)
; CHECK-NEXT: csrr t5, vlenb
; CHECK-NEXT: slli t5, t5, 4
; CHECK-NEXT: add t5, sp, t5
; CHECK-NEXT: addi t5, t5, 16
; CHECK-NEXT: vl4r.v v20, (t5) # vscale x 32-byte Folded Reload
; CHECK-NEXT: csrr t6, vlenb
; CHECK-NEXT: slli t6, t6, 2
; CHECK-NEXT: add t5, t5, t6
; CHECK-NEXT: vl4r.v v24, (t5) # vscale x 32-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vsseg4e64.v v20, (zero)
; CHECK-NEXT: j .LBB0_1
entry:
store double 0.000000e+00, ptr %var_117, align 8
store double 1.000000e+00, ptr %arrayinit.element3061, align 8
br label %for.body

for.body: ; preds = %for.body, %entry
%2 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3059, i64 0)
%3 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3067, i64 0)
%4 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3065, i64 0)
%5 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3063, i64 0)
%6 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3055, i64 0)
%7 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3057, i64 0)
%8 = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.p0.i64(<vscale x 2 x float> zeroinitializer, ptr %arrayinit.element3053, i64 0)
%9 = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.p0.i64(<vscale x 2 x double> zeroinitializer, ptr %arrayinit.element3051, i64 0)
%10 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3047, i64 0)
%11 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.p0.i64(<vscale x 2 x i32> zeroinitializer, ptr %arrayinit.element3049, i64 0)
call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv8i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) zeroinitializer, ptr null, i64 0, i64 5)
%12 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) zeroinitializer, <vscale x 2 x float> %8, i32 0)
%13 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %12, <vscale x 2 x float> %7, i32 2)
%14 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %13, <vscale x 2 x float> %6, i32 0)
%15 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %14, <vscale x 2 x float> %5, i32 0)
%16 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %15, <vscale x 2 x float> %4, i32 0)
%17 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %16, <vscale x 2 x float> %3, i32 0)
%18 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2f32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %17, <vscale x 2 x float> %2, i32 0)
call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %18, ptr %arrayinit.element3045, i64 0, i64 5)
%19 = tail call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.nxv2f64.i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %9, <vscale x 2 x i1> zeroinitializer, i64 0)
%20 = tail call <vscale x 2 x i32> @llvm.riscv.vssub.mask.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> %11, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %10, <vscale x 2 x i1> %19, i64 0, i64 0)
call void @llvm.riscv.vsseg2.triscv.vector.tuple_nxv16i8_2t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, ptr null, i64 %var_94_i.07698, i64 6)
call void @llvm.riscv.vsseg4.mask.triscv.vector.tuple_nxv16i8_4t.p0.nxv2i1.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) zeroinitializer, ptr null, <vscale x 2 x i1> zeroinitializer, i64 0, i64 6)
%21 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_8t.nxv2i32(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) poison, <vscale x 2 x i32> %20, i32 0)
call void @llvm.riscv.vsseg8.triscv.vector.tuple_nxv8i8_8t.p0.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %21, ptr %var_117, i64 0, i64 5)
call void @llvm.riscv.vsseg4.triscv.vector.tuple_nxv16i8_4t.p0.i64(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %1, ptr null, i64 0, i64 6)
br label %for.body
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = PseudoReadVLENB
; CHECK-NEXT: $x13 = SLLI $x12, 2
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x12 = SRLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
; CHECK-NEXT: $x11 = ADDI $x2, 16
Expand Down Expand Up @@ -93,10 +93,10 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
; CHECK-NEXT: $x12 = PseudoReadVLENB
; CHECK-NEXT: $x13 = SLLI $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10
Expand Down