Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 28 additions & 7 deletions llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,19 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}

static AllocationType getAllocType(const AllocationInfo *AllocInfo) {
return getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
AllocInfo->Info.getAllocCount(),
AllocInfo->Info.getTotalLifetime());
}

static AllocationType addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo,
uint64_t FullStackId) {
SmallVector<uint64_t> StackIds;
for (const auto &StackFrame : AllocInfo->CallStack)
StackIds.push_back(computeStackId(StackFrame));
auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
AllocInfo->Info.getAllocCount(),
AllocInfo->Info.getTotalLifetime());
auto AllocType = getAllocType(AllocInfo);
std::vector<ContextTotalSize> ContextSizeInfo;
if (recordContextSizeInfoForAnalysis()) {
auto TotalSize = AllocInfo->Info.getTotalSize();
Expand Down Expand Up @@ -406,22 +410,39 @@ handleAllocSite(Instruction &I, CallBase *CI,
const std::set<const AllocationInfo *> &AllocInfoSet,
std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
&FullStackIdToAllocMatchInfo) {
// TODO: Remove this once the profile creation logic deduplicates contexts
// that are the same other than the IsInlineFrame bool. Until then, keep the
// largest.
DenseMap<uint64_t, const AllocationInfo *> UniqueFullContextIdAllocInfo;
for (auto *AllocInfo : AllocInfoSet) {
auto FullStackId = computeFullStackId(AllocInfo->CallStack);
auto [It, Inserted] =
UniqueFullContextIdAllocInfo.insert({FullStackId, AllocInfo});
// If inserted entry, done.
if (Inserted)
continue;
// Keep the larger one, or the noncold one if they are the same size.
auto CurSize = It->second->Info.getTotalSize();
auto NewSize = AllocInfo->Info.getTotalSize();
if ((CurSize > NewSize) ||
(CurSize == NewSize &&
getAllocType(AllocInfo) != AllocationType::NotCold))
continue;
It->second = AllocInfo;
}
// We may match this instruction's location list to multiple MIB
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie(&ORE, MaxColdSize);
uint64_t TotalSize = 0;
uint64_t TotalColdSize = 0;
for (auto *AllocInfo : AllocInfoSet) {
for (auto &[FullStackId, AllocInfo] : UniqueFullContextIdAllocInfo) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
// this MIB.
if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
InlinedCallStack)) {
NumOfMemProfMatchedAllocContexts++;
uint64_t FullStackId = 0;
if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())
FullStackId = computeFullStackId(AllocInfo->CallStack);
auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
TotalSize += AllocInfo->Info.getTotalSize();
if (AllocType == AllocationType::Cold)
Expand Down
118 changes: 118 additions & 0 deletions llvm/test/Transforms/PGOProfile/memprof_diff_inline.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
;; Tests that the compiler ignores smaller contexts that differ only in the
;; IsInlineFrame bool. These map to the same full context id internally, as we
;; ignore the inline frame status which may differ in feedback compiles.
;; Presumably this happens when profiles collected from different binaries are
;; merged. If we didn't pick the largest we would default them all to noncold.

;; Avoid failures on big-endian systems that can't read the profile properly
; REQUIRES: x86_64-linux

;; Generate the profile and the IR.
; RUN: split-file %s %t

;; Generate indexed profile
; RUN: llvm-profdata merge %t/memprof_diff_inline.yaml -o %t.memprofdata

; RUN: opt < %t/memprof_diff_inline.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-print-match-info 2>&1 | FileCheck %s --check-prefixes=MEMPROF

; MEMPROF: MemProf notcold context with id 10194276560488437434 has total profiled size 200 is matched with 1 frames
; MEMPROF: MemProf cold context with id 16342802530253093571 has total profiled size 10000 is matched with 1 frames

;--- memprof_diff_inline.yaml
---
HeapProfileRecords:
- GUID: _Z3foov
AllocSites:
# Small non-cold, full context id 16342802530253093571, should ignore
- Callstack:
- { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 10
TotalLifetime: 0
TotalLifetimeAccessDensity: 20000
# Large cold, full context id 16342802530253093571, should keep
- Callstack:
- { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: true }
- { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 10000
TotalLifetime: 200000
TotalLifetimeAccessDensity: 0
# Small non-cold, full context id 16342802530253093571, should ignore
- Callstack:
- { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: true }
- { Function: main, LineOffset: 8, Column: 13, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 100
TotalLifetime: 0
TotalLifetimeAccessDensity: 20000
# Small non-cold, full context id 10194276560488437434
- Callstack:
- { Function: _Z3foov, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z4foo2v, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: _Z3barv, LineOffset: 1, Column: 10, IsInlineFrame: false }
- { Function: main, LineOffset: 9, Column: 13, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 200
TotalLifetime: 0
TotalLifetimeAccessDensity: 20000
CallSites: []
...
;--- memprof_diff_inline.ll
; ModuleID = 'memprof_diff_inline.cc'
source_filename = "memprof_diff_inline.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%"struct.std::nothrow_t" = type { i8 }

@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1

define dso_local noundef ptr @_Z3foov() !dbg !10 {
entry:
; MEMPROF: call {{.*}} @_Znwm{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
%call = call noalias noundef align 32 ptr @_Znwm(i64 noundef 32) #6, !dbg !13
ret ptr %call
}

declare noundef ptr @_Znwm(i64 noundef)

attributes #6 = { builtin allocsize(0) }

; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]]}

; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"notcold", ![[CONTEXTSIZE1:[0-9]+]]}
; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 2061451396820446691}
;; Full context id 10194276560488437434 == -8252467513221114182
; MEMPROF: ![[CONTEXTSIZE1]] = !{i64 -8252467513221114182, i64 200}

; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold", ![[CONTEXTSIZE2:[0-9]+]]}
; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 8467819354083268568, i64 9086428284934609951, i64 -5747251260480066785}
;; Full context id 16342802530253093571 == -2103941543456458045
;; We should have kept the large (cold) one.
; MEMPROF: ![[CONTEXTSIZE2]] = !{i64 -2103941543456458045, i64 10000}

; MEMPROF: ![[C1]] = !{i64 2732490490862098848}

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}

!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
!11 = !DISubroutineType(types: !12)
!12 = !{}
!13 = !DILocation(line: 5, column: 10, scope: !10)
46 changes: 36 additions & 10 deletions llvm/test/Transforms/PGOProfile/memprof_loop_unroll.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,50 @@
;; Avoid failures on big-endian systems that can't read the profile properly
; REQUIRES: x86_64-linux

;; TODO: Use text profile inputs once that is available for memprof.
;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
;; # To generate below LLVM IR for use in matching.
;; $ clang++ -gmlt -fdebug-info-for-profiling -S %S/Inputs/memprof_loop_unroll_b.cc -emit-llvm
; Generate the profile and the IR.
; RUN: split-file %s %t

;; Generate indexed profile
; RUN: llvm-profdata merge %t/memprof_loop_unroll.yaml -o %t.memprofdata

; RUN: llvm-profdata merge %S/Inputs/memprof_loop_unroll.memprofraw --profiled-binary %S/Inputs/memprof_loop_unroll.exe -o %t.memprofdata
;; Set the minimum lifetime threshold to 0 to ensure that one context is
;; considered cold (the other will be notcold).
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s
; RUN: opt < %t/memprof_loop_unroll.ll -passes='memprof-use<profile-filename=%t.memprofdata>' -S -memprof-report-hinted-sizes -memprof-ave-lifetime-cold-threshold=0 2>&1 | FileCheck %s

;; Conservatively annotate as not cold. We get two messages as there are two
;; unrolled copies of the allocation.
; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4
; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and indistinguishable alloc type notcold: 4
;; Conservatively annotate as not cold.
; CHECK: MemProf hinting: Total size for full allocation context hash {{.*}} and single alloc type notcold: 4
; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="notcold" }
; CHECK-NOT: stackIds: ()

;--- memprof_loop_unroll.yaml
---
HeapProfileRecords:
- GUID: 0x7f8d88fcc70a347b
AllocSites:
- Callstack:
- { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false }
- { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 4
TotalLifetime: 2
TotalLifetimeAccessDensity: 12500000000
- Callstack:
- { Function: 0x7f8d88fcc70a347b, LineOffset: 2, Column: 16, IsInlineFrame: false }
- { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
MemInfoBlock:
AllocCount: 1
TotalSize: 4
TotalLifetime: 2
TotalLifetimeAccessDensity: 0
- GUID: 0xdb956436e78dd5fa
CallSites:
- Frames:
- { Function: 0xdb956436e78dd5fa, LineOffset: 1, Column: 5, IsInlineFrame: false }
...

;--- memprof_loop_unroll.ll
; ModuleID = 'memprof_loop_unroll_b.cc'
source_filename = "memprof_loop_unroll_b.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
Expand Down
Loading