1 | //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the Module class, which describes a module that has |
10 | // been loaded from an AST file. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H |
15 | #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H |
16 | |
17 | #include "clang/Basic/FileManager.h" |
18 | #include "clang/Basic/Module.h" |
19 | #include "clang/Basic/SourceLocation.h" |
20 | #include "clang/Serialization/ASTBitCodes.h" |
21 | #include "clang/Serialization/ContinuousRangeMap.h" |
22 | #include "clang/Serialization/ModuleFileExtension.h" |
23 | #include "llvm/ADT/BitVector.h" |
24 | #include "llvm/ADT/DenseMap.h" |
25 | #include "llvm/ADT/PointerIntPair.h" |
26 | #include "llvm/ADT/SetVector.h" |
27 | #include "llvm/ADT/SmallVector.h" |
28 | #include "llvm/ADT/StringRef.h" |
29 | #include "llvm/Bitstream/BitstreamReader.h" |
30 | #include "llvm/Support/Endian.h" |
31 | #include <cassert> |
32 | #include <cstdint> |
33 | #include <memory> |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | namespace clang { |
38 | |
39 | namespace serialization { |
40 | |
41 | /// Specifies the kind of module that has been loaded. |
42 | enum ModuleKind { |
43 | /// File is an implicitly-loaded module. |
44 | MK_ImplicitModule, |
45 | |
46 | /// File is an explicitly-loaded module. |
47 | MK_ExplicitModule, |
48 | |
49 | /// File is a PCH file treated as such. |
50 | MK_PCH, |
51 | |
52 | /// File is a PCH file treated as the preamble. |
53 | MK_Preamble, |
54 | |
55 | /// File is a PCH file treated as the actual main file. |
56 | MK_MainFile, |
57 | |
58 | /// File is from a prebuilt module path. |
59 | MK_PrebuiltModule |
60 | }; |
61 | |
62 | /// The input file info that has been loaded from an AST file. |
63 | struct InputFileInfo { |
64 | std::string FilenameAsRequested; |
65 | std::string Filename; |
66 | uint64_t ContentHash; |
67 | off_t StoredSize; |
68 | time_t StoredTime; |
69 | bool Overridden; |
70 | bool Transient; |
71 | bool TopLevel; |
72 | bool ModuleMap; |
73 | }; |
74 | |
75 | /// The input file that has been loaded from this AST file, along with |
76 | /// bools indicating whether this was an overridden buffer or if it was |
77 | /// out-of-date or not-found. |
78 | class InputFile { |
79 | enum { |
80 | Overridden = 1, |
81 | OutOfDate = 2, |
82 | NotFound = 3 |
83 | }; |
84 | llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; |
85 | |
86 | public: |
87 | InputFile() = default; |
88 | |
89 | InputFile(FileEntryRef File, bool isOverridden = false, |
90 | bool isOutOfDate = false) { |
91 | assert(!(isOverridden && isOutOfDate) && |
92 | "an overridden cannot be out-of-date" ); |
93 | unsigned intVal = 0; |
94 | if (isOverridden) |
95 | intVal = Overridden; |
96 | else if (isOutOfDate) |
97 | intVal = OutOfDate; |
98 | Val.setPointerAndInt(PtrVal: &File.getMapEntry(), IntVal: intVal); |
99 | } |
100 | |
101 | static InputFile getNotFound() { |
102 | InputFile File; |
103 | File.Val.setInt(NotFound); |
104 | return File; |
105 | } |
106 | |
107 | OptionalFileEntryRef getFile() const { |
108 | if (auto *P = Val.getPointer()) |
109 | return FileEntryRef(*P); |
110 | return std::nullopt; |
111 | } |
112 | bool isOverridden() const { return Val.getInt() == Overridden; } |
113 | bool isOutOfDate() const { return Val.getInt() == OutOfDate; } |
114 | bool isNotFound() const { return Val.getInt() == NotFound; } |
115 | }; |
116 | |
117 | /// Information about a module that has been loaded by the ASTReader. |
118 | /// |
119 | /// Each instance of the Module class corresponds to a single AST file, which |
120 | /// may be a precompiled header, precompiled preamble, a module, or an AST file |
121 | /// of some sort loaded as the main file, all of which are specific formulations |
122 | /// of the general notion of a "module". A module may depend on any number of |
123 | /// other modules. |
124 | class ModuleFile { |
125 | public: |
126 | ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation) |
127 | : Kind(Kind), File(File), Generation(Generation) {} |
128 | ~ModuleFile(); |
129 | |
130 | // === General information === |
131 | |
132 | /// The index of this module in the list of modules. |
133 | unsigned Index = 0; |
134 | |
135 | /// The type of this module. |
136 | ModuleKind Kind; |
137 | |
138 | /// The file name of the module file. |
139 | std::string FileName; |
140 | |
141 | /// The name of the module. |
142 | std::string ModuleName; |
143 | |
144 | /// The base directory of the module. |
145 | std::string BaseDirectory; |
146 | |
147 | std::string getTimestampFilename() const { |
148 | return FileName + ".timestamp" ; |
149 | } |
150 | |
151 | /// The original source file name that was used to build the |
152 | /// primary AST file, which may have been modified for |
153 | /// relocatable-pch support. |
154 | std::string OriginalSourceFileName; |
155 | |
156 | /// The actual original source file name that was used to |
157 | /// build this AST file. |
158 | std::string ActualOriginalSourceFileName; |
159 | |
160 | /// The file ID for the original source file that was used to |
161 | /// build this AST file. |
162 | FileID OriginalSourceFileID; |
163 | |
164 | std::string ModuleMapPath; |
165 | |
166 | /// Whether this precompiled header is a relocatable PCH file. |
167 | bool RelocatablePCH = false; |
168 | |
169 | /// Whether this module file is a standard C++ module. |
170 | bool StandardCXXModule = false; |
171 | |
172 | /// Whether timestamps are included in this module file. |
173 | bool HasTimestamps = false; |
174 | |
175 | /// Whether the top-level module has been read from the AST file. |
176 | bool DidReadTopLevelSubmodule = false; |
177 | |
178 | /// The file entry for the module file. |
179 | FileEntryRef File; |
180 | |
181 | /// The signature of the module file, which may be used instead of the size |
182 | /// and modification time to identify this particular file. |
183 | ASTFileSignature Signature; |
184 | |
185 | /// The signature of the AST block of the module file, this can be used to |
186 | /// unique module files based on AST contents. |
187 | ASTFileSignature ASTBlockHash; |
188 | |
189 | /// The bit vector denoting usage of each header search entry (true = used). |
190 | llvm::BitVector SearchPathUsage; |
191 | |
192 | /// The bit vector denoting usage of each VFS entry (true = used). |
193 | llvm::BitVector VFSUsage; |
194 | |
195 | /// Whether this module has been directly imported by the |
196 | /// user. |
197 | bool DirectlyImported = false; |
198 | |
199 | /// The generation of which this module file is a part. |
200 | unsigned Generation; |
201 | |
202 | /// The memory buffer that stores the data associated with |
203 | /// this AST file, owned by the InMemoryModuleCache. |
204 | llvm::MemoryBuffer *Buffer = nullptr; |
205 | |
206 | /// The size of this file, in bits. |
207 | uint64_t SizeInBits = 0; |
208 | |
209 | /// The global bit offset (or base) of this module |
210 | uint64_t GlobalBitOffset = 0; |
211 | |
212 | /// The bit offset of the AST block of this module. |
213 | uint64_t ASTBlockStartOffset = 0; |
214 | |
215 | /// The serialized bitstream data for this file. |
216 | StringRef Data; |
217 | |
218 | /// The main bitstream cursor for the main block. |
219 | llvm::BitstreamCursor Stream; |
220 | |
221 | /// The source location where the module was explicitly or implicitly |
222 | /// imported in the local translation unit. |
223 | /// |
224 | /// If module A depends on and imports module B, both modules will have the |
225 | /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a |
226 | /// source location inside module A). |
227 | /// |
228 | /// WARNING: This is largely useless. It doesn't tell you when a module was |
229 | /// made visible, just when the first submodule of that module was imported. |
230 | SourceLocation DirectImportLoc; |
231 | |
232 | /// The source location where this module was first imported. |
233 | SourceLocation ImportLoc; |
234 | |
235 | /// The first source location in this module. |
236 | SourceLocation FirstLoc; |
237 | |
238 | /// The list of extension readers that are attached to this module |
239 | /// file. |
240 | std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; |
241 | |
242 | /// The module offset map data for this file. If non-empty, the various |
243 | /// ContinuousRangeMaps described below have not yet been populated. |
244 | StringRef ModuleOffsetMap; |
245 | |
246 | // === Input Files === |
247 | |
248 | /// The cursor to the start of the input-files block. |
249 | llvm::BitstreamCursor InputFilesCursor; |
250 | |
251 | /// Absolute offset of the start of the input-files block. |
252 | uint64_t InputFilesOffsetBase = 0; |
253 | |
254 | /// Relative offsets for all of the input file entries in the AST file. |
255 | const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; |
256 | |
257 | /// The input files that have been loaded from this AST file. |
258 | std::vector<InputFile> InputFilesLoaded; |
259 | |
260 | /// The input file infos that have been loaded from this AST file. |
261 | std::vector<InputFileInfo> InputFileInfosLoaded; |
262 | |
263 | // All user input files reside at the index range [0, NumUserInputFiles), and |
264 | // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). |
265 | unsigned NumUserInputFiles = 0; |
266 | |
267 | /// If non-zero, specifies the time when we last validated input |
268 | /// files. Zero means we never validated them. |
269 | /// |
270 | /// The time is specified in seconds since the start of the Epoch. |
271 | uint64_t InputFilesValidationTimestamp = 0; |
272 | |
273 | // === Source Locations === |
274 | |
275 | /// Cursor used to read source location entries. |
276 | llvm::BitstreamCursor SLocEntryCursor; |
277 | |
278 | /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. |
279 | uint64_t SourceManagerBlockStartOffset = 0; |
280 | |
281 | /// The number of source location entries in this AST file. |
282 | unsigned LocalNumSLocEntries = 0; |
283 | |
284 | /// The base ID in the source manager's view of this module. |
285 | int SLocEntryBaseID = 0; |
286 | |
287 | /// The base offset in the source manager's view of this module. |
288 | SourceLocation::UIntTy SLocEntryBaseOffset = 0; |
289 | |
290 | /// Base file offset for the offsets in SLocEntryOffsets. Real file offset |
291 | /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. |
292 | uint64_t SLocEntryOffsetsBase = 0; |
293 | |
294 | /// Offsets for all of the source location entries in the |
295 | /// AST file. |
296 | const uint32_t *SLocEntryOffsets = nullptr; |
297 | |
298 | /// Remapping table for source locations in this module. |
299 | ContinuousRangeMap<SourceLocation::UIntTy, SourceLocation::IntTy, 2> |
300 | SLocRemap; |
301 | |
302 | // === Identifiers === |
303 | |
304 | /// The number of identifiers in this AST file. |
305 | unsigned LocalNumIdentifiers = 0; |
306 | |
307 | /// Offsets into the identifier table data. |
308 | /// |
309 | /// This array is indexed by the identifier ID (-1), and provides |
310 | /// the offset into IdentifierTableData where the string data is |
311 | /// stored. |
312 | const uint32_t *IdentifierOffsets = nullptr; |
313 | |
314 | /// Base identifier ID for identifiers local to this module. |
315 | serialization::IdentID BaseIdentifierID = 0; |
316 | |
317 | /// Remapping table for identifier IDs in this module. |
318 | ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap; |
319 | |
320 | /// Actual data for the on-disk hash table of identifiers. |
321 | /// |
322 | /// This pointer points into a memory buffer, where the on-disk hash |
323 | /// table for identifiers actually lives. |
324 | const unsigned char *IdentifierTableData = nullptr; |
325 | |
326 | /// A pointer to an on-disk hash table of opaque type |
327 | /// IdentifierHashTable. |
328 | void *IdentifierLookupTable = nullptr; |
329 | |
330 | /// Offsets of identifiers that we're going to preload within |
331 | /// IdentifierTableData. |
332 | std::vector<unsigned> PreloadIdentifierOffsets; |
333 | |
334 | // === Macros === |
335 | |
336 | /// The cursor to the start of the preprocessor block, which stores |
337 | /// all of the macro definitions. |
338 | llvm::BitstreamCursor MacroCursor; |
339 | |
340 | /// The number of macros in this AST file. |
341 | unsigned LocalNumMacros = 0; |
342 | |
343 | /// Base file offset for the offsets in MacroOffsets. Real file offset for |
344 | /// the entry is MacroOffsetsBase + MacroOffsets[i]. |
345 | uint64_t MacroOffsetsBase = 0; |
346 | |
347 | /// Offsets of macros in the preprocessor block. |
348 | /// |
349 | /// This array is indexed by the macro ID (-1), and provides |
350 | /// the offset into the preprocessor block where macro definitions are |
351 | /// stored. |
352 | const uint32_t *MacroOffsets = nullptr; |
353 | |
354 | /// Base macro ID for macros local to this module. |
355 | serialization::MacroID BaseMacroID = 0; |
356 | |
357 | /// Remapping table for macro IDs in this module. |
358 | ContinuousRangeMap<uint32_t, int, 2> MacroRemap; |
359 | |
360 | /// The offset of the start of the set of defined macros. |
361 | uint64_t MacroStartOffset = 0; |
362 | |
363 | // === Detailed PreprocessingRecord === |
364 | |
365 | /// The cursor to the start of the (optional) detailed preprocessing |
366 | /// record block. |
367 | llvm::BitstreamCursor PreprocessorDetailCursor; |
368 | |
369 | /// The offset of the start of the preprocessor detail cursor. |
370 | uint64_t PreprocessorDetailStartOffset = 0; |
371 | |
372 | /// Base preprocessed entity ID for preprocessed entities local to |
373 | /// this module. |
374 | serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; |
375 | |
376 | /// Remapping table for preprocessed entity IDs in this module. |
377 | ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; |
378 | |
379 | const PPEntityOffset *PreprocessedEntityOffsets = nullptr; |
380 | unsigned NumPreprocessedEntities = 0; |
381 | |
382 | /// Base ID for preprocessed skipped ranges local to this module. |
383 | unsigned BasePreprocessedSkippedRangeID = 0; |
384 | |
385 | const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; |
386 | unsigned NumPreprocessedSkippedRanges = 0; |
387 | |
388 | // === Header search information === |
389 | |
390 | /// The number of local HeaderFileInfo structures. |
391 | unsigned = 0; |
392 | |
393 | /// Actual data for the on-disk hash table of header file |
394 | /// information. |
395 | /// |
396 | /// This pointer points into a memory buffer, where the on-disk hash |
397 | /// table for header file information actually lives. |
398 | const char * = nullptr; |
399 | |
400 | /// The on-disk hash table that contains information about each of |
401 | /// the header files. |
402 | void * = nullptr; |
403 | |
404 | // === Submodule information === |
405 | |
406 | /// The number of submodules in this module. |
407 | unsigned LocalNumSubmodules = 0; |
408 | |
409 | /// Base submodule ID for submodules local to this module. |
410 | serialization::SubmoduleID BaseSubmoduleID = 0; |
411 | |
412 | /// Remapping table for submodule IDs in this module. |
413 | ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; |
414 | |
415 | // === Selectors === |
416 | |
417 | /// The number of selectors new to this file. |
418 | /// |
419 | /// This is the number of entries in SelectorOffsets. |
420 | unsigned LocalNumSelectors = 0; |
421 | |
422 | /// Offsets into the selector lookup table's data array |
423 | /// where each selector resides. |
424 | const uint32_t *SelectorOffsets = nullptr; |
425 | |
426 | /// Base selector ID for selectors local to this module. |
427 | serialization::SelectorID BaseSelectorID = 0; |
428 | |
429 | /// Remapping table for selector IDs in this module. |
430 | ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; |
431 | |
432 | /// A pointer to the character data that comprises the selector table |
433 | /// |
434 | /// The SelectorOffsets table refers into this memory. |
435 | const unsigned char *SelectorLookupTableData = nullptr; |
436 | |
437 | /// A pointer to an on-disk hash table of opaque type |
438 | /// ASTSelectorLookupTable. |
439 | /// |
440 | /// This hash table provides the IDs of all selectors, and the associated |
441 | /// instance and factory methods. |
442 | void *SelectorLookupTable = nullptr; |
443 | |
444 | // === Declarations === |
445 | |
446 | /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. |
447 | /// It has read all the abbreviations at the start of the block and is ready |
448 | /// to jump around with these in context. |
449 | llvm::BitstreamCursor DeclsCursor; |
450 | |
451 | /// The offset to the start of the DECLTYPES_BLOCK block. |
452 | uint64_t DeclsBlockStartOffset = 0; |
453 | |
454 | /// The number of declarations in this AST file. |
455 | unsigned LocalNumDecls = 0; |
456 | |
457 | /// Offset of each declaration within the bitstream, indexed |
458 | /// by the declaration ID (-1). |
459 | const DeclOffset *DeclOffsets = nullptr; |
460 | |
461 | /// Base declaration ID for declarations local to this module. |
462 | serialization::DeclID BaseDeclID = 0; |
463 | |
464 | /// Remapping table for declaration IDs in this module. |
465 | ContinuousRangeMap<serialization::DeclID, int, 2> DeclRemap; |
466 | |
467 | /// Mapping from the module files that this module file depends on |
468 | /// to the base declaration ID for that module as it is understood within this |
469 | /// module. |
470 | /// |
471 | /// This is effectively a reverse global-to-local mapping for declaration |
472 | /// IDs, so that we can interpret a true global ID (for this translation unit) |
473 | /// as a local ID (for this module file). |
474 | llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs; |
475 | |
476 | /// Array of file-level DeclIDs sorted by file. |
477 | const serialization::LocalDeclID *FileSortedDecls = nullptr; |
478 | unsigned NumFileSortedDecls = 0; |
479 | |
480 | /// Array of category list location information within this |
481 | /// module file, sorted by the definition ID. |
482 | const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; |
483 | |
484 | /// The number of redeclaration info entries in ObjCCategoriesMap. |
485 | unsigned LocalNumObjCCategoriesInMap = 0; |
486 | |
487 | /// The Objective-C category lists for categories known to this |
488 | /// module. |
489 | SmallVector<uint64_t, 1> ObjCCategories; |
490 | |
491 | // === Types === |
492 | |
493 | /// The number of types in this AST file. |
494 | unsigned LocalNumTypes = 0; |
495 | |
496 | /// Offset of each type within the bitstream, indexed by the |
497 | /// type ID, or the representation of a Type*. |
498 | const UnderalignedInt64 *TypeOffsets = nullptr; |
499 | |
500 | /// Base type ID for types local to this module as represented in |
501 | /// the global type ID space. |
502 | serialization::TypeID BaseTypeIndex = 0; |
503 | |
504 | /// Remapping table for type IDs in this module. |
505 | ContinuousRangeMap<uint32_t, int, 2> TypeRemap; |
506 | |
507 | // === Miscellaneous === |
508 | |
509 | /// Diagnostic IDs and their mappings that the user changed. |
510 | SmallVector<uint64_t, 8> PragmaDiagMappings; |
511 | |
512 | /// List of modules which depend on this module |
513 | llvm::SetVector<ModuleFile *> ImportedBy; |
514 | |
515 | /// List of modules which this module depends on |
516 | llvm::SetVector<ModuleFile *> Imports; |
517 | |
518 | /// Determine whether this module was directly imported at |
519 | /// any point during translation. |
520 | bool isDirectlyImported() const { return DirectlyImported; } |
521 | |
522 | /// Is this a module file for a module (rather than a PCH or similar). |
523 | bool isModule() const { |
524 | return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || |
525 | Kind == MK_PrebuiltModule; |
526 | } |
527 | |
528 | /// Dump debugging output for this module. |
529 | void dump(); |
530 | }; |
531 | |
532 | } // namespace serialization |
533 | |
534 | } // namespace clang |
535 | |
536 | #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H |
537 | |