1 | //===- bolt/Passes/ReorderSection.cpp - Reordering of section data --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements ReorderData class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | // TODO: |
14 | // - make sure writable data isn't put on same cache line unless temporally |
15 | // local |
16 | // - estimate temporal locality by looking at CFG? |
17 | |
18 | #include "bolt/Passes/ReorderData.h" |
19 | #include "llvm/ADT/MapVector.h" |
20 | #include <algorithm> |
21 | |
22 | #undef DEBUG_TYPE |
23 | #define DEBUG_TYPE "reorder-data" |
24 | |
25 | using namespace llvm; |
26 | using namespace bolt; |
27 | |
28 | namespace opts { |
29 | extern cl::OptionCategory BoltCategory; |
30 | extern cl::OptionCategory BoltOptCategory; |
31 | extern cl::opt<JumpTableSupportLevel> JumpTables; |
32 | |
33 | static cl::opt<bool> |
34 | PrintReorderedData("print-reordered-data" , |
35 | cl::desc("print section contents after reordering" ), |
36 | cl::Hidden, cl::cat(BoltCategory)); |
37 | |
38 | cl::list<std::string> |
39 | ReorderData("reorder-data" , |
40 | cl::CommaSeparated, |
41 | cl::desc("list of sections to reorder" ), |
42 | cl::value_desc("section1,section2,section3,..." ), |
43 | cl::cat(BoltOptCategory)); |
44 | |
45 | enum ReorderAlgo : char { |
46 | REORDER_COUNT = 0, |
47 | REORDER_FUNCS = 1 |
48 | }; |
49 | |
50 | static cl::opt<ReorderAlgo> |
51 | ReorderAlgorithm("reorder-data-algo" , |
52 | cl::desc("algorithm used to reorder data sections" ), |
53 | cl::init(Val: REORDER_COUNT), |
54 | cl::values( |
55 | clEnumValN(REORDER_COUNT, |
56 | "count" , |
57 | "sort hot data by read counts" ), |
58 | clEnumValN(REORDER_FUNCS, |
59 | "funcs" , |
60 | "sort hot data by hot function usage and count" )), |
61 | cl::ZeroOrMore, |
62 | cl::cat(BoltOptCategory)); |
63 | |
64 | static cl::opt<unsigned> |
65 | ReorderDataMaxSymbols("reorder-data-max-symbols" , |
66 | cl::desc("maximum number of symbols to reorder" ), |
67 | cl::init(Val: std::numeric_limits<unsigned>::max()), |
68 | cl::cat(BoltOptCategory)); |
69 | |
70 | static cl::opt<unsigned> ReorderDataMaxBytes( |
71 | "reorder-data-max-bytes" , cl::desc("maximum number of bytes to reorder" ), |
72 | cl::init(Val: std::numeric_limits<unsigned>::max()), cl::cat(BoltOptCategory)); |
73 | |
74 | static cl::list<std::string> |
75 | ReorderSymbols("reorder-symbols" , |
76 | cl::CommaSeparated, |
77 | cl::desc("list of symbol names that can be reordered" ), |
78 | cl::value_desc("symbol1,symbol2,symbol3,..." ), |
79 | cl::Hidden, |
80 | cl::cat(BoltCategory)); |
81 | |
82 | static cl::list<std::string> |
83 | SkipSymbols("reorder-skip-symbols" , |
84 | cl::CommaSeparated, |
85 | cl::desc("list of symbol names that cannot be reordered" ), |
86 | cl::value_desc("symbol1,symbol2,symbol3,..." ), |
87 | cl::Hidden, |
88 | cl::cat(BoltCategory)); |
89 | |
90 | static cl::opt<bool> ReorderInplace("reorder-data-inplace" , |
91 | cl::desc("reorder data sections in place" ), |
92 | |
93 | cl::cat(BoltOptCategory)); |
94 | } |
95 | |
96 | namespace llvm { |
97 | namespace bolt { |
98 | |
99 | namespace { |
100 | |
101 | static constexpr uint16_t MinAlignment = 16; |
102 | |
103 | bool isSupported(const BinarySection &BS) { return BS.isData() && !BS.isTLS(); } |
104 | |
105 | bool filterSymbol(const BinaryData *BD) { |
106 | if (!BD->isAtomic() || BD->isJumpTable() || !BD->isMoveable()) |
107 | return false; |
108 | |
109 | bool IsValid = true; |
110 | |
111 | if (!opts::ReorderSymbols.empty()) { |
112 | IsValid = llvm::any_of(Range&: opts::ReorderSymbols, P: [&](const std::string &Name) { |
113 | return BD->hasName(Name); |
114 | }); |
115 | } |
116 | |
117 | if (!IsValid) |
118 | return false; |
119 | |
120 | if (!opts::SkipSymbols.empty()) { |
121 | for (const std::string &Name : opts::SkipSymbols) { |
122 | if (BD->hasName(Name)) { |
123 | IsValid = false; |
124 | break; |
125 | } |
126 | } |
127 | } |
128 | |
129 | return IsValid; |
130 | } |
131 | |
132 | } // namespace |
133 | |
134 | using DataOrder = ReorderData::DataOrder; |
135 | |
136 | void ReorderData::printOrder(BinaryContext &BC, const BinarySection &Section, |
137 | DataOrder::const_iterator Begin, |
138 | DataOrder::const_iterator End) const { |
139 | uint64_t TotalSize = 0; |
140 | bool = false; |
141 | while (Begin != End) { |
142 | const BinaryData *BD = Begin->first; |
143 | |
144 | if (!PrintHeader) { |
145 | BC.outs() << "BOLT-INFO: Hot global symbols for " << Section.getName() |
146 | << ":\n" ; |
147 | PrintHeader = true; |
148 | } |
149 | |
150 | BC.outs() << "BOLT-INFO: " << *BD << ", moveable=" << BD->isMoveable() |
151 | << format(Fmt: ", weight=%.5f\n" , |
152 | Vals: double(Begin->second) / BD->getSize()); |
153 | |
154 | TotalSize += BD->getSize(); |
155 | ++Begin; |
156 | } |
157 | if (TotalSize) |
158 | BC.outs() << "BOLT-INFO: Total hot symbol size = " << TotalSize << "\n" ; |
159 | } |
160 | |
161 | DataOrder ReorderData::baseOrder(BinaryContext &BC, |
162 | const BinarySection &Section) const { |
163 | DataOrder Order; |
164 | for (auto &Entry : BC.getBinaryDataForSection(Section)) { |
165 | BinaryData *BD = Entry.second; |
166 | if (!BD->isAtomic()) // skip sub-symbols |
167 | continue; |
168 | auto BDCI = BinaryDataCounts.find(x: BD); |
169 | uint64_t BDCount = BDCI == BinaryDataCounts.end() ? 0 : BDCI->second; |
170 | Order.emplace_back(args&: BD, args&: BDCount); |
171 | } |
172 | return Order; |
173 | } |
174 | |
175 | void ReorderData::assignMemData(BinaryContext &BC) { |
176 | // Map of sections (or heap/stack) to count/size. |
177 | MapVector<StringRef, uint64_t> Counts; |
178 | MapVector<StringRef, uint64_t> JumpTableCounts; |
179 | uint64_t TotalCount = 0; |
180 | for (auto &BFI : BC.getBinaryFunctions()) { |
181 | const BinaryFunction &BF = BFI.second; |
182 | if (!BF.hasMemoryProfile()) |
183 | continue; |
184 | |
185 | for (const BinaryBasicBlock &BB : BF) { |
186 | for (const MCInst &Inst : BB) { |
187 | auto ErrorOrMemAccessProfile = |
188 | BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>( |
189 | Inst, Name: "MemoryAccessProfile" ); |
190 | if (!ErrorOrMemAccessProfile) |
191 | continue; |
192 | |
193 | const MemoryAccessProfile &MemAccessProfile = |
194 | ErrorOrMemAccessProfile.get(); |
195 | for (const AddressAccess &AccessInfo : |
196 | MemAccessProfile.AddressAccessInfo) { |
197 | if (BinaryData *BD = AccessInfo.MemoryObject) { |
198 | BinaryDataCounts[BD->getAtomicRoot()] += AccessInfo.Count; |
199 | Counts[BD->getSectionName()] += AccessInfo.Count; |
200 | if (BD->getAtomicRoot()->isJumpTable()) |
201 | JumpTableCounts[BD->getSectionName()] += AccessInfo.Count; |
202 | } else { |
203 | Counts["Heap/stack" ] += AccessInfo.Count; |
204 | } |
205 | TotalCount += AccessInfo.Count; |
206 | } |
207 | } |
208 | } |
209 | } |
210 | |
211 | if (!Counts.empty()) { |
212 | BC.outs() << "BOLT-INFO: Memory stats breakdown:\n" ; |
213 | for (const auto &KV : Counts) { |
214 | StringRef Section = KV.first; |
215 | const uint64_t Count = KV.second; |
216 | BC.outs() << "BOLT-INFO: " << Section << " = " << Count |
217 | << format(Fmt: " (%.1f%%)\n" , Vals: 100.0 * Count / TotalCount); |
218 | if (JumpTableCounts.count(Key: Section) != 0) { |
219 | const uint64_t JTCount = JumpTableCounts[Section]; |
220 | BC.outs() << "BOLT-INFO: jump tables = " << JTCount |
221 | << format(Fmt: " (%.1f%%)\n" , Vals: 100.0 * JTCount / Count); |
222 | } |
223 | } |
224 | BC.outs() << "BOLT-INFO: Total memory events: " << TotalCount << "\n" ; |
225 | } |
226 | } |
227 | |
228 | /// Only consider moving data that is used by the hottest functions with |
229 | /// valid profiles. |
230 | std::pair<DataOrder, unsigned> |
231 | ReorderData::sortedByFunc(BinaryContext &BC, const BinarySection &Section, |
232 | std::map<uint64_t, BinaryFunction> &BFs) const { |
233 | std::map<BinaryData *, std::set<BinaryFunction *>> BDtoFunc; |
234 | std::map<BinaryData *, uint64_t> BDtoFuncCount; |
235 | |
236 | auto dataUses = [&BC](const BinaryFunction &BF, bool OnlyHot) { |
237 | std::set<BinaryData *> Uses; |
238 | for (const BinaryBasicBlock &BB : BF) { |
239 | if (OnlyHot && BB.isCold()) |
240 | continue; |
241 | |
242 | for (const MCInst &Inst : BB) { |
243 | auto ErrorOrMemAccessProfile = |
244 | BC.MIB->tryGetAnnotationAs<MemoryAccessProfile>( |
245 | Inst, Name: "MemoryAccessProfile" ); |
246 | if (!ErrorOrMemAccessProfile) |
247 | continue; |
248 | |
249 | const MemoryAccessProfile &MemAccessProfile = |
250 | ErrorOrMemAccessProfile.get(); |
251 | for (const AddressAccess &AccessInfo : |
252 | MemAccessProfile.AddressAccessInfo) { |
253 | if (AccessInfo.MemoryObject) |
254 | Uses.insert(x: AccessInfo.MemoryObject); |
255 | } |
256 | } |
257 | } |
258 | return Uses; |
259 | }; |
260 | |
261 | for (auto &Entry : BFs) { |
262 | BinaryFunction &BF = Entry.second; |
263 | if (BF.hasValidProfile()) { |
264 | for (BinaryData *BD : dataUses(BF, true)) { |
265 | if (!BC.getFunctionForSymbol(Symbol: BD->getSymbol())) { |
266 | BDtoFunc[BD->getAtomicRoot()].insert(x: &BF); |
267 | BDtoFuncCount[BD->getAtomicRoot()] += BF.getKnownExecutionCount(); |
268 | } |
269 | } |
270 | } |
271 | } |
272 | |
273 | DataOrder Order = baseOrder(BC, Section); |
274 | unsigned SplitPoint = Order.size(); |
275 | |
276 | llvm::sort( |
277 | C&: Order, |
278 | Comp: [&](const DataOrder::value_type &A, const DataOrder::value_type &B) { |
279 | // Total execution counts of functions referencing BD. |
280 | const uint64_t ACount = BDtoFuncCount[A.first]; |
281 | const uint64_t BCount = BDtoFuncCount[B.first]; |
282 | // Weight by number of loads/data size. |
283 | const double AWeight = double(A.second) / A.first->getSize(); |
284 | const double BWeight = double(B.second) / B.first->getSize(); |
285 | return (ACount > BCount || |
286 | (ACount == BCount && |
287 | (AWeight > BWeight || |
288 | (AWeight == BWeight && |
289 | A.first->getAddress() < B.first->getAddress())))); |
290 | }); |
291 | |
292 | for (unsigned Idx = 0; Idx < Order.size(); ++Idx) { |
293 | if (!BDtoFuncCount[Order[Idx].first]) { |
294 | SplitPoint = Idx; |
295 | break; |
296 | } |
297 | } |
298 | |
299 | return std::make_pair(x&: Order, y&: SplitPoint); |
300 | } |
301 | |
302 | std::pair<DataOrder, unsigned> |
303 | ReorderData::sortedByCount(BinaryContext &BC, |
304 | const BinarySection &Section) const { |
305 | DataOrder Order = baseOrder(BC, Section); |
306 | unsigned SplitPoint = Order.size(); |
307 | |
308 | llvm::sort(C&: Order, Comp: [](const DataOrder::value_type &A, |
309 | const DataOrder::value_type &B) { |
310 | // Weight by number of loads/data size. |
311 | const double AWeight = double(A.second) / A.first->getSize(); |
312 | const double BWeight = double(B.second) / B.first->getSize(); |
313 | return (AWeight > BWeight || |
314 | (AWeight == BWeight && |
315 | (A.first->getSize() < B.first->getSize() || |
316 | (A.first->getSize() == B.first->getSize() && |
317 | A.first->getAddress() < B.first->getAddress())))); |
318 | }); |
319 | |
320 | for (unsigned Idx = 0; Idx < Order.size(); ++Idx) { |
321 | if (!Order[Idx].second) { |
322 | SplitPoint = Idx; |
323 | break; |
324 | } |
325 | } |
326 | |
327 | return std::make_pair(x&: Order, y&: SplitPoint); |
328 | } |
329 | |
330 | // TODO |
331 | // add option for cache-line alignment (or just use cache-line when section |
332 | // is writable)? |
333 | void ReorderData::setSectionOrder(BinaryContext &BC, |
334 | BinarySection &OutputSection, |
335 | DataOrder::iterator Begin, |
336 | DataOrder::iterator End) { |
337 | std::vector<BinaryData *> NewOrder; |
338 | unsigned NumReordered = 0; |
339 | uint64_t Offset = 0; |
340 | uint64_t Count = 0; |
341 | |
342 | // Get the total count just for stats |
343 | uint64_t TotalCount = 0; |
344 | for (auto Itr = Begin; Itr != End; ++Itr) |
345 | TotalCount += Itr->second; |
346 | |
347 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: setSectionOrder for " |
348 | << OutputSection.getName() << "\n" ); |
349 | |
350 | for (; Begin != End; ++Begin) { |
351 | BinaryData *BD = Begin->first; |
352 | |
353 | // We can't move certain symbols. |
354 | if (!filterSymbol(BD)) |
355 | continue; |
356 | |
357 | ++NumReordered; |
358 | if (NumReordered > opts::ReorderDataMaxSymbols) { |
359 | if (!NewOrder.empty()) |
360 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol " |
361 | << *NewOrder.back() << "\n" ); |
362 | break; |
363 | } |
364 | |
365 | uint16_t Alignment = std::max(a: BD->getAlignment(), b: MinAlignment); |
366 | Offset = alignTo(Value: Offset, Align: Alignment); |
367 | |
368 | if ((Offset + BD->getSize()) > opts::ReorderDataMaxBytes) { |
369 | if (!NewOrder.empty()) |
370 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: processing ending on symbol " |
371 | << *NewOrder.back() << "\n" ); |
372 | break; |
373 | } |
374 | |
375 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: " << BD->getName() << " @ 0x" |
376 | << Twine::utohexstr(Offset) << "\n" ); |
377 | |
378 | BD->setOutputLocation(NewSection&: OutputSection, NewOffset: Offset); |
379 | |
380 | // reorder sub-symbols |
381 | for (std::pair<const uint64_t, BinaryData *> &SubBD : |
382 | BC.getSubBinaryData(BD)) { |
383 | if (!SubBD.second->isJumpTable()) { |
384 | uint64_t SubOffset = |
385 | Offset + SubBD.second->getAddress() - BD->getAddress(); |
386 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: SubBD " << SubBD.second->getName() |
387 | << " @ " << SubOffset << "\n" ); |
388 | SubBD.second->setOutputLocation(NewSection&: OutputSection, NewOffset: SubOffset); |
389 | } |
390 | } |
391 | |
392 | Offset += BD->getSize(); |
393 | Count += Begin->second; |
394 | NewOrder.push_back(x: BD); |
395 | } |
396 | |
397 | OutputSection.reorderContents(Order: NewOrder, Inplace: opts::ReorderInplace); |
398 | |
399 | BC.outs() << "BOLT-INFO: reorder-data: " << Count << "/" << TotalCount |
400 | << format(Fmt: " (%.1f%%)" , Vals: 100.0 * Count / TotalCount) << " events, " |
401 | << Offset << " hot bytes\n" ; |
402 | } |
403 | |
404 | bool ReorderData::markUnmoveableSymbols(BinaryContext &BC, |
405 | BinarySection &Section) const { |
406 | // Private symbols currently can't be moved because data can "leak" across |
407 | // the boundary of one symbol to the next, e.g. a string that has a common |
408 | // suffix might start in one private symbol and end with the common |
409 | // suffix in another. |
410 | auto isPrivate = [&](const BinaryData *BD) { |
411 | auto Prefix = std::string("PG" ) + BC.AsmInfo->getPrivateGlobalPrefix(); |
412 | return BD->getName().starts_with(Prefix: Prefix.str()); |
413 | }; |
414 | auto Range = BC.getBinaryDataForSection(Section); |
415 | bool FoundUnmoveable = false; |
416 | for (auto Itr = Range.begin(); Itr != Range.end(); ++Itr) { |
417 | BinaryData *Next = |
418 | std::next(x: Itr) != Range.end() ? std::next(x: Itr)->second : nullptr; |
419 | if (Itr->second->getName().starts_with(Prefix: "PG." )) { |
420 | BinaryData *Prev = |
421 | Itr != Range.begin() ? std::prev(x: Itr)->second : nullptr; |
422 | bool PrevIsPrivate = Prev && isPrivate(Prev); |
423 | bool NextIsPrivate = Next && isPrivate(Next); |
424 | if (isPrivate(Itr->second) && (PrevIsPrivate || NextIsPrivate)) |
425 | Itr->second->setIsMoveable(false); |
426 | } else { |
427 | // check for overlapping symbols. |
428 | if (Next && Itr->second->getEndAddress() != Next->getAddress() && |
429 | Next->containsAddress(Address: Itr->second->getEndAddress())) { |
430 | Itr->second->setIsMoveable(false); |
431 | Next->setIsMoveable(false); |
432 | } |
433 | } |
434 | FoundUnmoveable |= !Itr->second->isMoveable(); |
435 | } |
436 | return FoundUnmoveable; |
437 | } |
438 | |
439 | Error ReorderData::runOnFunctions(BinaryContext &BC) { |
440 | static const char *DefaultSections[] = {".rodata" , ".data" , ".bss" , nullptr}; |
441 | |
442 | if (!BC.HasRelocations || opts::ReorderData.empty()) |
443 | return Error::success(); |
444 | |
445 | // For now |
446 | if (opts::JumpTables > JTS_BASIC) { |
447 | BC.outs() << "BOLT-WARNING: jump table support must be basic for " |
448 | << "data reordering to work.\n" ; |
449 | return Error::success(); |
450 | } |
451 | |
452 | assignMemData(BC); |
453 | |
454 | std::vector<BinarySection *> Sections; |
455 | |
456 | for (const std::string &SectionName : opts::ReorderData) { |
457 | if (SectionName == "default" ) { |
458 | for (unsigned I = 0; DefaultSections[I]; ++I) |
459 | if (ErrorOr<BinarySection &> Section = |
460 | BC.getUniqueSectionByName(SectionName: DefaultSections[I])) |
461 | Sections.push_back(x: &*Section); |
462 | continue; |
463 | } |
464 | |
465 | ErrorOr<BinarySection &> Section = BC.getUniqueSectionByName(SectionName); |
466 | if (!Section) { |
467 | BC.outs() << "BOLT-WARNING: Section " << SectionName |
468 | << " not found, skipping.\n" ; |
469 | continue; |
470 | } |
471 | |
472 | if (!isSupported(BS: *Section)) { |
473 | BC.errs() << "BOLT-ERROR: Section " << SectionName << " not supported.\n" ; |
474 | return createFatalBOLTError(S: "" ); |
475 | } |
476 | |
477 | Sections.push_back(x: &*Section); |
478 | } |
479 | |
480 | for (BinarySection *Section : Sections) { |
481 | const bool FoundUnmoveable = markUnmoveableSymbols(BC, Section&: *Section); |
482 | |
483 | DataOrder Order; |
484 | unsigned SplitPointIdx; |
485 | |
486 | if (opts::ReorderAlgorithm == opts::ReorderAlgo::REORDER_COUNT) { |
487 | BC.outs() << "BOLT-INFO: reorder-sections: ordering data by count\n" ; |
488 | std::tie(args&: Order, args&: SplitPointIdx) = sortedByCount(BC, Section: *Section); |
489 | } else { |
490 | BC.outs() << "BOLT-INFO: reorder-sections: ordering data by funcs\n" ; |
491 | std::tie(args&: Order, args&: SplitPointIdx) = |
492 | sortedByFunc(BC, Section: *Section, BFs&: BC.getBinaryFunctions()); |
493 | } |
494 | auto SplitPoint = Order.begin() + SplitPointIdx; |
495 | |
496 | if (opts::PrintReorderedData) |
497 | printOrder(BC, Section: *Section, Begin: Order.begin(), End: SplitPoint); |
498 | |
499 | if (!opts::ReorderInplace || FoundUnmoveable) { |
500 | if (opts::ReorderInplace && FoundUnmoveable) |
501 | BC.outs() << "BOLT-INFO: Found unmoveable symbols in " |
502 | << Section->getName() << " falling back to splitting " |
503 | << "instead of in-place reordering.\n" ; |
504 | |
505 | // Rename sections. |
506 | BinarySection &Hot = |
507 | BC.registerSection(SectionName: Section->getName() + ".hot" , OriginalSection: *Section); |
508 | Hot.setOutputName(Section->getName()); |
509 | Section->setOutputName(".bolt.org" + Section->getName()); |
510 | |
511 | // Reorder contents of original section. |
512 | setSectionOrder(BC, OutputSection&: Hot, Begin: Order.begin(), End: SplitPoint); |
513 | |
514 | // This keeps the original data from thinking it has been moved. |
515 | for (std::pair<const uint64_t, BinaryData *> &Entry : |
516 | BC.getBinaryDataForSection(Section&: *Section)) { |
517 | if (!Entry.second->isMoved()) { |
518 | Entry.second->setSection(*Section); |
519 | Entry.second->setOutputSection(*Section); |
520 | } |
521 | } |
522 | } else { |
523 | BC.outs() |
524 | << "BOLT-WARNING: Inplace section reordering not supported yet.\n" ; |
525 | setSectionOrder(BC, OutputSection&: *Section, Begin: Order.begin(), End: Order.end()); |
526 | } |
527 | } |
528 | return Error::success(); |
529 | } |
530 | |
531 | } // namespace bolt |
532 | } // namespace llvm |
533 | |