| 1 | // REQUIRES: lld, lld-available |
| 2 | |
| 3 | // Building the instrumented binary will fail because lld doesn't support |
| 4 | // big-endian ELF for PPC (aka ABI 1). |
| 5 | // ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported |
| 6 | // UNSUPPORTED: ppc && host-byteorder-big-endian |
| 7 | |
| 8 | // RUN: rm -rf %t && mkdir %t && split-file %s %t && cd %t |
| 9 | |
| 10 | // RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling lib.cpp main.cpp -o test |
| 11 | // RUN: env LLVM_PROFILE_FILE=test.profraw ./test |
| 12 | |
| 13 | // Show vtable profiles from raw profile. |
| 14 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW |
| 15 | |
| 16 | // Generate indexed profile from raw profile and show the data. |
| 17 | // RUN: llvm-profdata merge --keep-vtable-symbols test.profraw -o test.profdata |
| 18 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED |
| 19 | |
| 20 | // Generate text profile from raw and indexed profiles respectively and show the data. |
| 21 | // RUN: llvm-profdata merge --keep-vtable-symbols --text test.profraw -o raw.proftext |
| 22 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT |
| 23 | // RUN: llvm-profdata merge --keep-vtable-symbols --text test.profdata -o indexed.proftext |
| 24 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT |
| 25 | |
| 26 | // Generate indexed profile from text profiles and show the data |
| 27 | // RUN: llvm-profdata merge --keep-vtable-symbols --binary raw.proftext -o text.profraw |
| 28 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED |
| 29 | // RUN: llvm-profdata merge --keep-vtable-symbols --binary indexed.proftext -o text.profdata |
| 30 | // RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED |
| 31 | |
| 32 | // COMMON: Counters: |
| 33 | // COMMON-NEXT: main: |
| 34 | // COMMON-NEXT: Hash: 0x068617320ec408a0 |
| 35 | // COMMON-NEXT: Counters: 4 |
| 36 | // COMMON-NEXT: Indirect Call Site Count: 2 |
| 37 | // COMMON-NEXT: Number of instrumented vtables: 2 |
| 38 | // RAW: Indirect Target Results: |
| 39 | // RAW-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) |
| 40 | // RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) |
| 41 | // RAW-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) |
| 42 | // RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) |
| 43 | // RAW-NEXT: VTable Results: |
| 44 | // RAW-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) |
| 45 | // RAW-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) |
| 46 | // RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) |
| 47 | // RAW-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) |
| 48 | // INDEXED: Indirect Target Results: |
| 49 | // INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii, 150 ] (75.00%) |
| 50 | // INDEXED-NEXT: [ 0, _ZN8Derived14funcEii, 50 ] (25.00%) |
| 51 | // INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev, 750 ] (75.00%) |
| 52 | // INDEXED-NEXT: [ 1, _ZN8Derived1D0Ev, 250 ] (25.00%) |
| 53 | // INDEXED-NEXT: VTable Results: |
| 54 | // INDEXED-NEXT: [ 0, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 150 ] (75.00%) |
| 55 | // INDEXED-NEXT: [ 0, _ZTV8Derived1, 50 ] (25.00%) |
| 56 | // INDEXED-NEXT: [ 1, {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) |
| 57 | // INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) |
| 58 | // COMMON: Instrumentation level: IR entry_first = 0 |
| 59 | // COMMON-NEXT: Functions shown: 1 |
| 60 | // COMMON-NEXT: Total functions: 7 |
| 61 | // COMMON-NEXT: Maximum function count: 1000 |
| 62 | // COMMON-NEXT: Maximum internal block count: 1000 |
| 63 | // COMMON-NEXT: Statistics for indirect call sites profile: |
| 64 | // COMMON-NEXT: Total number of sites: 2 |
| 65 | // COMMON-NEXT: Total number of sites with values: 2 |
| 66 | // COMMON-NEXT: Total number of profiled values: 4 |
| 67 | // COMMON-NEXT: Value sites histogram: |
| 68 | // COMMON-NEXT: NumTargets, SiteCount |
| 69 | // COMMON-NEXT: 2, 2 |
| 70 | // COMMON-NEXT: Statistics for vtable profile: |
| 71 | // COMMON-NEXT: Total number of sites: 2 |
| 72 | // COMMON-NEXT: Total number of sites with values: 2 |
| 73 | // COMMON-NEXT: Total number of profiled values: 4 |
| 74 | // COMMON-NEXT: Value sites histogram: |
| 75 | // COMMON-NEXT: NumTargets, SiteCount |
| 76 | // COMMON-NEXT: 2, 2 |
| 77 | |
| 78 | // ICTEXT: :ir |
| 79 | // ICTEXT: main |
| 80 | // ICTEXT: # Func Hash: |
| 81 | // ICTEXT: 470088714870327456 |
| 82 | // ICTEXT: # Num Counters: |
| 83 | // ICTEXT: 4 |
| 84 | // ICTEXT: # Counter Values: |
| 85 | // ICTEXT: 1000 |
| 86 | // ICTEXT: 1000 |
| 87 | // ICTEXT: 200 |
| 88 | // ICTEXT: 1 |
| 89 | // ICTEXT: # Num Value Kinds: |
| 90 | // ICTEXT: 2 |
| 91 | // ICTEXT: # ValueKind = IPVK_IndirectCallTarget: |
| 92 | // ICTEXT: 0 |
| 93 | // ICTEXT: # NumValueSites: |
| 94 | // ICTEXT: 2 |
| 95 | // ICTEXT: 2 |
| 96 | // ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150 |
| 97 | // ICTEXT: _ZN8Derived14funcEii:50 |
| 98 | // ICTEXT: 2 |
| 99 | // ICTEXT: {{.*}}lib.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750 |
| 100 | // ICTEXT: _ZN8Derived1D0Ev:250 |
| 101 | // ICTEXT: # ValueKind = IPVK_VTableTarget: |
| 102 | // ICTEXT: 2 |
| 103 | // ICTEXT: # NumValueSites: |
| 104 | // ICTEXT: 2 |
| 105 | // ICTEXT: 2 |
| 106 | // ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150 |
| 107 | // ICTEXT: _ZTV8Derived1:50 |
| 108 | // ICTEXT: 2 |
| 109 | // ICTEXT: {{.*}}lib.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 |
| 110 | // ICTEXT: _ZTV8Derived1:250 |
| 111 | |
| 112 | // When vtable value profiles exist, pgo-instr-use pass should annotate them |
| 113 | // even if `-enable-vtable-value-profiling` is not explicitly on. |
| 114 | // RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \ |
| 115 | // RUN: -mllvm -print-after=pgo-instr-use -mllvm -filter-print-funcs=main \ |
| 116 | // RUN: -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | FileCheck %s --check-prefix=ANNOTATE |
| 117 | |
| 118 | // ANNOTATE-NOT: Inconsistent number of value sites |
| 119 | // ANNOTATE: !{!"VP", i32 2 |
| 120 | |
| 121 | // When vtable value profiles exist, pgo-instr-use pass will not annotate them |
| 122 | // if `-icp-max-num-vtables` is set to zero. |
| 123 | // RUN: %clangxx -m64 -fprofile-use=test.profdata -fuse-ld=lld -O2 \ |
| 124 | // RUN: -mllvm -icp-max-num-vtables=0 -mllvm -print-after=pgo-instr-use \ |
| 125 | // RUN: -mllvm -filter-print-funcs=main -mllvm -print-module-scope lib.cpp main.cpp 2>&1 | \ |
| 126 | // RUN: FileCheck %s --check-prefix=OMIT |
| 127 | |
| 128 | // OMIT: Inconsistent number of value sites |
| 129 | // OMIT-NOT: !{!"VP", i32 2 |
| 130 | |
| 131 | // Test indirect call promotion transformation using vtable profiles. |
| 132 | // - Build with `-g` to enable debug information. |
| 133 | // - In real world settings, ICP pass is disabled in prelink pipeline. In |
| 134 | // the postlink pipeline, ICP is enabled after whole-program-devirtualization |
| 135 | // pass. Do the same thing in this test. |
| 136 | // - Enable `-fwhole-program-vtables` generate type metadata and intrinsics. |
| 137 | // - Enable `-fno-split-lto-unit` and `-Wl,-lto-whole-program-visibility` to |
| 138 | // preserve type intrinsics for ICP pass. |
| 139 | // RUN: %clangxx -m64 -fprofile-use=test.profdata -Wl,--lto-whole-program-visibility \ |
| 140 | // RUN: -mllvm -disable-icp=true -Wl,-mllvm,-disable-icp=false -fuse-ld=lld \ |
| 141 | // RUN: -g -flto=thin -fwhole-program-vtables -fno-split-lto-unit -O2 \ |
| 142 | // RUN: -mllvm -enable-vtable-value-profiling -Wl,-mllvm,-enable-vtable-value-profiling \ |
| 143 | // RUN: -mllvm -enable-vtable-profile-use \ |
| 144 | // RUN: -Wl,-mllvm,-enable-vtable-profile-use -Rpass=pgo-icall-prom \ |
| 145 | // RUN: -Wl,-mllvm,-print-after=pgo-icall-prom \ |
| 146 | // RUN: -Wl,-mllvm,-filter-print-funcs=main lib.cpp main.cpp 2>&1 \ |
| 147 | // RUN: | FileCheck %s --check-prefixes=REMARK,IR --implicit-check-not="!VP" |
| 148 | |
| 149 | // For the indirect call site `ptr->func` |
| 150 | // REMARK: main.cpp:10:19: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} with count 150 out of 200, sink 1 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}} |
| 151 | // REMARK: main.cpp:10:19: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, sink 1 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} |
| 152 | // |
| 153 | // For the indirect call site `delete ptr` |
| 154 | // REMARK: main.cpp:12:5: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev.llvm.{{.*}} with count 750 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}} |
| 155 | // REMARK: main.cpp:12:5: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, sink 2 instruction(s) and compare 1 vtable(s): {_ZTV8Derived1} |
| 156 | |
| 157 | // The IR matchers for indirect callsite `ptr->func`. |
| 158 | // IR-LABEL: @main |
| 159 | // IR: [[OBJ:%.*]] = {{.*}}call {{.*}} @_Z10createTypei |
| 160 | // IR: [[VTABLE:%.*]] = load ptr, ptr [[OBJ]] |
| 161 | // IR: [[CMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTVN12_GLOBAL__N_18Derived2E.llvm.{{.*}}, i32 16) |
| 162 | // IR: br i1 [[CMP1]], label %[[BB1:.*]], label %[[BB2:[a-zA-Z0-9_.]+]], |
| 163 | // |
| 164 | // IR: [[BB1]]: |
| 165 | // IR: [[RESBB1:%.*]] = {{.*}}call {{.*}} @_ZN12_GLOBAL__N_18Derived24funcEii.llvm.{{.*}} |
| 166 | // IR: br label %[[MERGE0:[a-zA-Z0-9_.]+]] |
| 167 | // |
| 168 | // IR: [[BB2]]: |
| 169 | // IR: [[CMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @_ZTV8Derived1, i32 16) |
| 170 | // IR: br i1 [[CMP2]], label %[[BB3:.*]], label %[[BB4:[a-zA-Z0-9_.]+]], |
| 171 | // |
| 172 | // IR: [[BB3]]: |
| 173 | // IR: [[RESBB3:%.*]] = {{.*}}call {{.*}} @_ZN8Derived14funcEii |
| 174 | // IR: br label %[[MERGE1:[a-zA-Z0-9_.]+]], |
| 175 | // |
| 176 | // IR: [[BB4]]: |
| 177 | // IR: [[FUNCPTR:%.*]] = load ptr, ptr [[VTABLE]] |
| 178 | // IR: [[RESBB4:%.*]] = {{.*}}call {{.*}} [[FUNCPTR]] |
| 179 | // IR: br label %[[MERGE1]] |
| 180 | // |
| 181 | // IR: [[MERGE1]]: |
| 182 | // IR: [[RES1:%.*]] = phi i32 [ [[RESBB4]], %[[BB4]] ], [ [[RESBB3]], %[[BB3]] ] |
| 183 | // IR: br label %[[MERGE0]] |
| 184 | // |
| 185 | // IR: [[MERGE0]]: |
| 186 | // IR: [[RES2:%.*]] = phi i32 [ [[RES1]], %[[MERGE1]] ], [ [[RESBB1]], %[[BB1]] ] |
| 187 | |
| 188 | //--- lib.h |
| 189 | #include <stdio.h> |
| 190 | #include <stdlib.h> |
| 191 | class Base { |
| 192 | public: |
| 193 | virtual int func(int a, int b) = 0; |
| 194 | |
| 195 | virtual ~Base() {}; |
| 196 | }; |
| 197 | |
| 198 | class Derived1 : public Base { |
| 199 | public: |
| 200 | int func(int a, int b) override; |
| 201 | |
| 202 | ~Derived1() {} |
| 203 | }; |
| 204 | |
| 205 | __attribute__((noinline)) Base *createType(int a); |
| 206 | |
| 207 | //--- lib.cpp |
| 208 | #include "lib.h" |
| 209 | |
| 210 | namespace { |
| 211 | class Derived2 : public Base { |
| 212 | public: |
| 213 | int func(int a, int b) override { return a * (a - b); } |
| 214 | |
| 215 | ~Derived2() {} |
| 216 | }; |
| 217 | } // namespace |
| 218 | |
| 219 | int Derived1::func(int a, int b) { return a * b; } |
| 220 | |
| 221 | Base *createType(int a) { |
| 222 | Base *base = nullptr; |
| 223 | if (a % 4 == 0) |
| 224 | base = new Derived1(); |
| 225 | else |
| 226 | base = new Derived2(); |
| 227 | return base; |
| 228 | } |
| 229 | |
| 230 | //--- main.cpp |
| 231 | #include "lib.h" |
| 232 | |
| 233 | int main(int argc, char **argv) { |
| 234 | int sum = 0; |
| 235 | for (int i = 0; i < 1000; i++) { |
| 236 | int a = rand(); |
| 237 | int b = rand(); |
| 238 | Base *ptr = createType(a: i); |
| 239 | if (i % 5 == 0) |
| 240 | sum += ptr->func(a: b, b: a); |
| 241 | |
| 242 | delete ptr; |
| 243 | } |
| 244 | printf(format: "sum is %d\n" , sum); |
| 245 | return 0; |
| 246 | } |
| 247 | |