1 | //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the interface to a pass that merges duplicate global |
10 | // constants together into a single constant that is shared. This is useful |
11 | // because some passes (ie TraceValues) insert a lot of string constants into |
12 | // the program, regardless of whether or not an existing string is available. |
13 | // |
14 | // Algorithm: ConstantMerge is designed to build up a map of available constants |
15 | // and eliminate duplicates when it is initialized. |
16 | // |
17 | //===----------------------------------------------------------------------===// |
18 | |
19 | #include "llvm/Transforms/IPO/ConstantMerge.h" |
20 | #include "llvm/ADT/DenseMap.h" |
21 | #include "llvm/ADT/SmallPtrSet.h" |
22 | #include "llvm/ADT/SmallVector.h" |
23 | #include "llvm/ADT/Statistic.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/DataLayout.h" |
26 | #include "llvm/IR/DerivedTypes.h" |
27 | #include "llvm/IR/GlobalValue.h" |
28 | #include "llvm/IR/GlobalVariable.h" |
29 | #include "llvm/IR/LLVMContext.h" |
30 | #include "llvm/IR/Module.h" |
31 | #include "llvm/Support/Casting.h" |
32 | #include "llvm/Transforms/IPO.h" |
33 | #include <algorithm> |
34 | #include <cassert> |
35 | #include <utility> |
36 | |
37 | using namespace llvm; |
38 | |
39 | #define DEBUG_TYPE "constmerge" |
40 | |
41 | STATISTIC(NumIdenticalMerged, "Number of identical global constants merged" ); |
42 | |
43 | /// Find values that are marked as llvm.used. |
44 | static void FindUsedValues(GlobalVariable *LLVMUsed, |
45 | SmallPtrSetImpl<const GlobalValue*> &UsedValues) { |
46 | if (!LLVMUsed) return; |
47 | ConstantArray *Inits = cast<ConstantArray>(Val: LLVMUsed->getInitializer()); |
48 | |
49 | for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { |
50 | Value *Operand = Inits->getOperand(i_nocapture: i)->stripPointerCasts(); |
51 | GlobalValue *GV = cast<GlobalValue>(Val: Operand); |
52 | UsedValues.insert(Ptr: GV); |
53 | } |
54 | } |
55 | |
56 | // True if A is better than B. |
57 | static bool IsBetterCanonical(const GlobalVariable &A, |
58 | const GlobalVariable &B) { |
59 | if (!A.hasLocalLinkage() && B.hasLocalLinkage()) |
60 | return true; |
61 | |
62 | if (A.hasLocalLinkage() && !B.hasLocalLinkage()) |
63 | return false; |
64 | |
65 | return A.hasGlobalUnnamedAddr(); |
66 | } |
67 | |
68 | static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) { |
69 | SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; |
70 | GV->getAllMetadata(MDs); |
71 | for (const auto &V : MDs) |
72 | if (V.first != LLVMContext::MD_dbg) |
73 | return true; |
74 | return false; |
75 | } |
76 | |
77 | static void copyDebugLocMetadata(const GlobalVariable *From, |
78 | GlobalVariable *To) { |
79 | SmallVector<DIGlobalVariableExpression *, 1> MDs; |
80 | From->getDebugInfo(GVs&: MDs); |
81 | for (auto *MD : MDs) |
82 | To->addDebugInfo(GV: MD); |
83 | } |
84 | |
85 | static Align getAlign(GlobalVariable *GV) { |
86 | return GV->getAlign().value_or( |
87 | u: GV->getParent()->getDataLayout().getPreferredAlign(GV)); |
88 | } |
89 | |
90 | static bool |
91 | isUnmergeableGlobal(GlobalVariable *GV, |
92 | const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) { |
93 | // Only process constants with initializers in the default address space. |
94 | return !GV->isConstant() || !GV->hasDefinitiveInitializer() || |
95 | GV->getType()->getAddressSpace() != 0 || GV->hasSection() || |
96 | // Don't touch thread-local variables. |
97 | GV->isThreadLocal() || |
98 | // Don't touch values marked with attribute(used). |
99 | UsedGlobals.count(Ptr: GV); |
100 | } |
101 | |
102 | enum class CanMerge { No, Yes }; |
103 | static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) { |
104 | if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr()) |
105 | return CanMerge::No; |
106 | if (hasMetadataOtherThanDebugLoc(GV: Old)) |
107 | return CanMerge::No; |
108 | assert(!hasMetadataOtherThanDebugLoc(New)); |
109 | if (!Old->hasGlobalUnnamedAddr()) |
110 | New->setUnnamedAddr(GlobalValue::UnnamedAddr::None); |
111 | return CanMerge::Yes; |
112 | } |
113 | |
114 | static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { |
115 | Constant *NewConstant = New; |
116 | |
117 | LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @" |
118 | << New->getName() << "\n" ); |
119 | |
120 | // Bump the alignment if necessary. |
121 | if (Old->getAlign() || New->getAlign()) |
122 | New->setAlignment(std::max(a: getAlign(GV: Old), b: getAlign(GV: New))); |
123 | |
124 | copyDebugLocMetadata(From: Old, To: New); |
125 | Old->replaceAllUsesWith(V: NewConstant); |
126 | |
127 | // Delete the global value from the module. |
128 | assert(Old->hasLocalLinkage() && |
129 | "Refusing to delete an externally visible global variable." ); |
130 | Old->eraseFromParent(); |
131 | } |
132 | |
133 | static bool mergeConstants(Module &M) { |
134 | // Find all the globals that are marked "used". These cannot be merged. |
135 | SmallPtrSet<const GlobalValue*, 8> UsedGlobals; |
136 | FindUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.used" ), UsedValues&: UsedGlobals); |
137 | FindUsedValues(LLVMUsed: M.getGlobalVariable(Name: "llvm.compiler.used" ), UsedValues&: UsedGlobals); |
138 | |
139 | // Map unique constants to globals. |
140 | DenseMap<Constant *, GlobalVariable *> CMap; |
141 | |
142 | SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32> |
143 | SameContentReplacements; |
144 | |
145 | size_t ChangesMade = 0; |
146 | size_t OldChangesMade = 0; |
147 | |
148 | // Iterate constant merging while we are still making progress. Merging two |
149 | // constants together may allow us to merge other constants together if the |
150 | // second level constants have initializers which point to the globals that |
151 | // were just merged. |
152 | while (true) { |
153 | // Find the canonical constants others will be merged with. |
154 | for (GlobalVariable &GV : llvm::make_early_inc_range(Range: M.globals())) { |
155 | // If this GV is dead, remove it. |
156 | GV.removeDeadConstantUsers(); |
157 | if (GV.use_empty() && GV.hasLocalLinkage()) { |
158 | GV.eraseFromParent(); |
159 | ++ChangesMade; |
160 | continue; |
161 | } |
162 | |
163 | if (isUnmergeableGlobal(GV: &GV, UsedGlobals)) |
164 | continue; |
165 | |
166 | // This transformation is legal for weak ODR globals in the sense it |
167 | // doesn't change semantics, but we really don't want to perform it |
168 | // anyway; it's likely to pessimize code generation, and some tools |
169 | // (like the Darwin linker in cases involving CFString) don't expect it. |
170 | if (GV.isWeakForLinker()) |
171 | continue; |
172 | |
173 | // Don't touch globals with metadata other then !dbg. |
174 | if (hasMetadataOtherThanDebugLoc(GV: &GV)) |
175 | continue; |
176 | |
177 | Constant *Init = GV.getInitializer(); |
178 | |
179 | // Check to see if the initializer is already known. |
180 | GlobalVariable *&Slot = CMap[Init]; |
181 | |
182 | // If this is the first constant we find or if the old one is local, |
183 | // replace with the current one. If the current is externally visible |
184 | // it cannot be replace, but can be the canonical constant we merge with. |
185 | bool FirstConstantFound = !Slot; |
186 | if (FirstConstantFound || IsBetterCanonical(A: GV, B: *Slot)) { |
187 | Slot = &GV; |
188 | LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName() |
189 | << (FirstConstantFound ? "\n" : " (updated)\n" )); |
190 | } |
191 | } |
192 | |
193 | // Identify all globals that can be merged together, filling in the |
194 | // SameContentReplacements vector. We cannot do the replacement in this pass |
195 | // because doing so may cause initializers of other globals to be rewritten, |
196 | // invalidating the Constant* pointers in CMap. |
197 | for (GlobalVariable &GV : llvm::make_early_inc_range(Range: M.globals())) { |
198 | if (isUnmergeableGlobal(GV: &GV, UsedGlobals)) |
199 | continue; |
200 | |
201 | // We can only replace constant with local linkage. |
202 | if (!GV.hasLocalLinkage()) |
203 | continue; |
204 | |
205 | Constant *Init = GV.getInitializer(); |
206 | |
207 | // Check to see if the initializer is already known. |
208 | auto Found = CMap.find(Val: Init); |
209 | if (Found == CMap.end()) |
210 | continue; |
211 | |
212 | GlobalVariable *Slot = Found->second; |
213 | if (Slot == &GV) |
214 | continue; |
215 | |
216 | if (makeMergeable(Old: &GV, New: Slot) == CanMerge::No) |
217 | continue; |
218 | |
219 | // Make all uses of the duplicate constant use the canonical version. |
220 | LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @" |
221 | << Slot->getName() << "\n" ); |
222 | SameContentReplacements.push_back(Elt: std::make_pair(x: &GV, y&: Slot)); |
223 | } |
224 | |
225 | // Now that we have figured out which replacements must be made, do them all |
226 | // now. This avoid invalidating the pointers in CMap, which are unneeded |
227 | // now. |
228 | for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) { |
229 | GlobalVariable *Old = SameContentReplacements[i].first; |
230 | GlobalVariable *New = SameContentReplacements[i].second; |
231 | replace(M, Old, New); |
232 | ++ChangesMade; |
233 | ++NumIdenticalMerged; |
234 | } |
235 | |
236 | if (ChangesMade == OldChangesMade) |
237 | break; |
238 | OldChangesMade = ChangesMade; |
239 | |
240 | SameContentReplacements.clear(); |
241 | CMap.clear(); |
242 | } |
243 | |
244 | return ChangesMade; |
245 | } |
246 | |
247 | PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { |
248 | if (!mergeConstants(M)) |
249 | return PreservedAnalyses::all(); |
250 | return PreservedAnalyses::none(); |
251 | } |
252 | |