1 | //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // \file |
10 | // Uses profile information to split out cold blocks. |
11 | // |
12 | // This pass splits out cold machine basic blocks from the parent function. This |
13 | // implementation leverages the basic block section framework. Blocks marked |
14 | // cold by this pass are grouped together in a separate section prefixed with |
15 | // ".text.unlikely.*". The linker can then group these together as a cold |
16 | // section. The split part of the function is a contiguous region identified by |
17 | // the symbol "foo.cold". Grouping all cold blocks across functions together |
18 | // decreases fragmentation and improves icache and itlb utilization. Note that |
19 | // the overall changes to the binary size are negligible; only a small number of |
20 | // additional jump instructions may be introduced. |
21 | // |
22 | // For the original RFC of this pass please see |
23 | // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ |
24 | //===----------------------------------------------------------------------===// |
25 | |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
28 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
29 | #include "llvm/Analysis/EHUtils.h" |
30 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
31 | #include "llvm/CodeGen/BasicBlockSectionUtils.h" |
32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
33 | #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" |
34 | #include "llvm/CodeGen/MachineFunction.h" |
35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | #include "llvm/CodeGen/Passes.h" |
38 | #include "llvm/CodeGen/TargetInstrInfo.h" |
39 | #include "llvm/IR/Function.h" |
40 | #include "llvm/InitializePasses.h" |
41 | #include "llvm/Support/CommandLine.h" |
42 | #include <optional> |
43 | |
44 | using namespace llvm; |
45 | |
46 | // FIXME: This cutoff value is CPU dependent and should be moved to |
47 | // TargetTransformInfo once we consider enabling this on other platforms. |
48 | // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. |
49 | // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. |
50 | // The default was empirically determined to be optimal when considering cutoff |
51 | // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on |
52 | // Intel CPUs. |
53 | static cl::opt<unsigned> |
54 | PercentileCutoff("mfs-psi-cutoff" , |
55 | cl::desc("Percentile profile summary cutoff used to " |
56 | "determine cold blocks. Unused if set to zero." ), |
57 | cl::init(Val: 999950), cl::Hidden); |
58 | |
59 | static cl::opt<unsigned> ColdCountThreshold( |
60 | "mfs-count-threshold" , |
61 | cl::desc( |
62 | "Minimum number of times a block must be executed to be retained." ), |
63 | cl::init(Val: 1), cl::Hidden); |
64 | |
65 | static cl::opt<bool> SplitAllEHCode( |
66 | "mfs-split-ehcode" , |
67 | cl::desc("Splits all EH code and it's descendants by default." ), |
68 | cl::init(Val: false), cl::Hidden); |
69 | |
70 | namespace { |
71 | |
72 | class MachineFunctionSplitter : public MachineFunctionPass { |
73 | public: |
74 | static char ID; |
75 | MachineFunctionSplitter() : MachineFunctionPass(ID) { |
76 | initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); |
77 | } |
78 | |
79 | StringRef getPassName() const override { |
80 | return "Machine Function Splitter Transformation" ; |
81 | } |
82 | |
83 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
84 | |
85 | bool runOnMachineFunction(MachineFunction &F) override; |
86 | }; |
87 | } // end anonymous namespace |
88 | |
89 | /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable |
90 | /// only by EH pad as cold. This will help mark EH pads statically cold |
91 | /// instead of relying on profile data. |
92 | static void setDescendantEHBlocksCold(MachineFunction &MF) { |
93 | DenseSet<MachineBasicBlock *> EHBlocks; |
94 | computeEHOnlyBlocks(F&: MF, EHBlocks); |
95 | for (auto Block : EHBlocks) { |
96 | Block->setSectionID(MBBSectionID::ColdSectionID); |
97 | } |
98 | } |
99 | |
100 | static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) { |
101 | auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { |
102 | return X.getSectionID().Type < Y.getSectionID().Type; |
103 | }; |
104 | llvm::sortBasicBlocksAndUpdateBranches(MF, MBBCmp: Comparator); |
105 | llvm::avoidZeroOffsetLandingPad(MF); |
106 | } |
107 | |
108 | static bool isColdBlock(const MachineBasicBlock &MBB, |
109 | const MachineBlockFrequencyInfo *MBFI, |
110 | ProfileSummaryInfo *PSI) { |
111 | std::optional<uint64_t> Count = MBFI->getBlockProfileCount(MBB: &MBB); |
112 | // For instrumentation profiles and sample profiles, we use different ways |
113 | // to judge whether a block is cold and should be split. |
114 | if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) { |
115 | // If using instrument profile, which is deemed "accurate", no count means |
116 | // cold. |
117 | if (!Count) |
118 | return true; |
119 | if (PercentileCutoff > 0) |
120 | return PSI->isColdCountNthPercentile(PercentileCutoff, C: *Count); |
121 | // Fallthrough to end of function. |
122 | } else if (PSI->hasSampleProfile()) { |
123 | // For sample profile, no count means "do not judege coldness". |
124 | if (!Count) |
125 | return false; |
126 | } |
127 | |
128 | return (*Count < ColdCountThreshold); |
129 | } |
130 | |
131 | bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { |
132 | // We target functions with profile data. Static information in the form |
133 | // of exception handling code may be split to cold if user passes the |
134 | // mfs-split-ehcode flag. |
135 | bool UseProfileData = MF.getFunction().hasProfileData(); |
136 | if (!UseProfileData && !SplitAllEHCode) |
137 | return false; |
138 | |
139 | const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); |
140 | if (!TII.isFunctionSafeToSplit(MF)) |
141 | return false; |
142 | |
143 | // Renumbering blocks here preserves the order of the blocks as |
144 | // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort |
145 | // blocks. Preserving the order of blocks is essential to retaining decisions |
146 | // made by prior passes such as MachineBlockPlacement. |
147 | MF.RenumberBlocks(); |
148 | MF.setBBSectionsType(BasicBlockSection::Preset); |
149 | |
150 | MachineBlockFrequencyInfo *MBFI = nullptr; |
151 | ProfileSummaryInfo *PSI = nullptr; |
152 | if (UseProfileData) { |
153 | MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); |
154 | PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
155 | // If we don't have a good profile (sample profile is not deemed |
156 | // as a "good profile") and the function is not hot, then early |
157 | // return. (Because we can only trust hot functions when profile |
158 | // quality is not good.) |
159 | if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(F: &MF, BFI&: *MBFI)) { |
160 | // Split all EH code and it's descendant statically by default. |
161 | if (SplitAllEHCode) |
162 | setDescendantEHBlocksCold(MF); |
163 | finishAdjustingBasicBlocksAndLandingPads(MF); |
164 | return true; |
165 | } |
166 | } |
167 | |
168 | SmallVector<MachineBasicBlock *, 2> LandingPads; |
169 | for (auto &MBB : MF) { |
170 | if (MBB.isEntryBlock()) |
171 | continue; |
172 | |
173 | if (MBB.isEHPad()) |
174 | LandingPads.push_back(Elt: &MBB); |
175 | else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && |
176 | TII.isMBBSafeToSplitToCold(MBB) && !SplitAllEHCode) |
177 | MBB.setSectionID(MBBSectionID::ColdSectionID); |
178 | } |
179 | |
180 | // Split all EH code and it's descendant statically by default. |
181 | if (SplitAllEHCode) |
182 | setDescendantEHBlocksCold(MF); |
183 | // We only split out eh pads if all of them are cold. |
184 | else { |
185 | // Here we have UseProfileData == true. |
186 | bool HasHotLandingPads = false; |
187 | for (const MachineBasicBlock *LP : LandingPads) { |
188 | if (!isColdBlock(MBB: *LP, MBFI, PSI) || !TII.isMBBSafeToSplitToCold(MBB: *LP)) |
189 | HasHotLandingPads = true; |
190 | } |
191 | if (!HasHotLandingPads) { |
192 | for (MachineBasicBlock *LP : LandingPads) |
193 | LP->setSectionID(MBBSectionID::ColdSectionID); |
194 | } |
195 | } |
196 | |
197 | finishAdjustingBasicBlocksAndLandingPads(MF); |
198 | return true; |
199 | } |
200 | |
201 | void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { |
202 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
203 | AU.addRequired<MachineBlockFrequencyInfo>(); |
204 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
205 | } |
206 | |
207 | char MachineFunctionSplitter::ID = 0; |
208 | INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter" , |
209 | "Split machine functions using profile information" , false, |
210 | false) |
211 | |
212 | MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { |
213 | return new MachineFunctionSplitter(); |
214 | } |
215 | |