1//===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class for loading and validating a module map or
10// header list by checking that all headers in the corresponding directories
11// are accounted for.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/SourceManager.h"
16#include "clang/Driver/Options.h"
17#include "clang/Frontend/CompilerInstance.h"
18#include "clang/Frontend/FrontendActions.h"
19#include "CoverageChecker.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/raw_ostream.h"
25#include "ModularizeUtilities.h"
26
27using namespace clang;
28using namespace llvm;
29using namespace Modularize;
30
31namespace {
32// Subclass TargetOptions so we can construct it inline with
33// the minimal option, the triple.
34class ModuleMapTargetOptions : public clang::TargetOptions {
35public:
36 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37};
38} // namespace
39
40// ModularizeUtilities class implementation.
41
42// Constructor.
43ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44 llvm::StringRef Prefix,
45 llvm::StringRef ProblemFilesListPath)
46 : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48 MissingHeaderCount(0),
49 // Init clang stuff needed for loading the module map and preprocessing.
50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51 DC(llvm::errs(), DiagnosticOpts),
52 Diagnostics(new DiagnosticsEngine(DiagIDs, DiagnosticOpts, &DC, false)),
53 TargetOpts(new ModuleMapTargetOptions()),
54 Target(TargetInfo::CreateTargetInfo(Diags&: *Diagnostics, Opts&: *TargetOpts)),
55 FileMgr(new FileManager(FileSystemOpts)),
56 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), HSOpts(),
57 HeaderInfo(new HeaderSearch(HSOpts, *SourceMgr, *Diagnostics, *LangOpts,
58 Target.get())) {}
59
60// Create instance of ModularizeUtilities, to simplify setting up
61// subordinate objects.
62ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
63 std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
64 llvm::StringRef ProblemFilesListPath) {
65
66 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
67}
68
69// Load all header lists and dependencies.
70std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
71 // For each input file.
72 for (llvm::StringRef InputPath : InputFilePaths) {
73 // If it's a module map.
74 if (InputPath.ends_with(Suffix: ".modulemap")) {
75 // Load the module map.
76 if (std::error_code EC = loadModuleMap(InputPath))
77 return EC;
78 } else {
79 // Else we assume it's a header list and load it.
80 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
81 errs() << "modularize: error: Unable to get header list '" << InputPath
82 << "': " << EC.message() << '\n';
83 return EC;
84 }
85 }
86 }
87 // If we have a problem files list.
88 if (ProblemFilesPath.size() != 0) {
89 // Load problem files list.
90 if (std::error_code EC = loadProblemHeaderList(InputPath: ProblemFilesPath)) {
91 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
92 << "': " << EC.message() << '\n';
93 return EC;
94 }
95 }
96 return std::error_code();
97}
98
99// Do coverage checks.
100// For each loaded module map, do header coverage check.
101// Starting from the directory of the module.modulemap file,
102// Find all header files, optionally looking only at files
103// covered by the include path options, and compare against
104// the headers referenced by the module.modulemap file.
105// Display warnings for unaccounted-for header files.
106// Returns 0 if there were no errors or warnings, 1 if there
107// were warnings, 2 if any other problem, such as a bad
108// module map path argument was specified.
109std::error_code ModularizeUtilities::doCoverageCheck(
110 std::vector<std::string> &IncludePaths,
111 llvm::ArrayRef<std::string> CommandLine) {
112 int ModuleMapCount = ModuleMaps.size();
113 int ModuleMapIndex;
114 std::error_code EC;
115 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
116 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
117 auto Checker = CoverageChecker::createCoverageChecker(
118 ModuleMapPath: InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
119 ModuleMap: ModMap.get());
120 std::error_code LocalEC = Checker->doChecks();
121 if (LocalEC.value() > 0)
122 EC = LocalEC;
123 }
124 return EC;
125}
126
127// Load single header list and dependencies.
128std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
129 llvm::StringRef InputPath) {
130
131 // By default, use the path component of the list file name.
132 SmallString<256> HeaderDirectory(InputPath);
133 llvm::sys::path::remove_filename(path&: HeaderDirectory);
134 SmallString<256> CurrentDirectory;
135 llvm::sys::fs::current_path(result&: CurrentDirectory);
136
137 // Get the prefix if we have one.
138 if (HeaderPrefix.size() != 0)
139 HeaderDirectory = HeaderPrefix;
140
141 // Read the header list file into a buffer.
142 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
143 MemoryBuffer::getFile(Filename: InputPath);
144 if (std::error_code EC = listBuffer.getError())
145 return EC;
146
147 // Parse the header list into strings.
148 SmallVector<StringRef, 32> Strings;
149 listBuffer.get()->getBuffer().split(A&: Strings, Separator: "\n", MaxSplit: -1, KeepEmpty: false);
150
151 // Collect the header file names from the string list.
152 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
153 E = Strings.end();
154 I != E; ++I) {
155 StringRef Line = I->trim();
156 // Ignore comments and empty lines.
157 if (Line.empty() || (Line[0] == '#'))
158 continue;
159 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(Separator: ':');
160 SmallString<256> HeaderFileName;
161 // Prepend header file name prefix if it's not absolute.
162 if (llvm::sys::path::is_absolute(path: TargetAndDependents.first))
163 llvm::sys::path::native(path: TargetAndDependents.first, result&: HeaderFileName);
164 else {
165 if (HeaderDirectory.size() != 0)
166 HeaderFileName = HeaderDirectory;
167 else
168 HeaderFileName = CurrentDirectory;
169 llvm::sys::path::append(path&: HeaderFileName, a: TargetAndDependents.first);
170 llvm::sys::path::native(path&: HeaderFileName);
171 }
172 // Handle optional dependencies.
173 DependentsVector Dependents;
174 SmallVector<StringRef, 4> DependentsList;
175 TargetAndDependents.second.split(A&: DependentsList, Separator: " ", MaxSplit: -1, KeepEmpty: false);
176 int Count = DependentsList.size();
177 for (int Index = 0; Index < Count; ++Index) {
178 SmallString<256> Dependent;
179 if (llvm::sys::path::is_absolute(path: DependentsList[Index]))
180 Dependent = DependentsList[Index];
181 else {
182 if (HeaderDirectory.size() != 0)
183 Dependent = HeaderDirectory;
184 else
185 Dependent = CurrentDirectory;
186 llvm::sys::path::append(path&: Dependent, a: DependentsList[Index]);
187 }
188 llvm::sys::path::native(path&: Dependent);
189 Dependents.push_back(Elt: getCanonicalPath(FilePath: Dependent.str()));
190 }
191 // Get canonical form.
192 HeaderFileName = getCanonicalPath(FilePath: HeaderFileName);
193 // Save the resulting header file path and dependencies.
194 HeaderFileNames.push_back(Elt: std::string(HeaderFileName));
195 Dependencies[HeaderFileName.str()] = Dependents;
196 }
197 return std::error_code();
198}
199
200// Load problem header list.
201std::error_code ModularizeUtilities::loadProblemHeaderList(
202 llvm::StringRef InputPath) {
203
204 // By default, use the path component of the list file name.
205 SmallString<256> HeaderDirectory(InputPath);
206 llvm::sys::path::remove_filename(path&: HeaderDirectory);
207 SmallString<256> CurrentDirectory;
208 llvm::sys::fs::current_path(result&: CurrentDirectory);
209
210 // Get the prefix if we have one.
211 if (HeaderPrefix.size() != 0)
212 HeaderDirectory = HeaderPrefix;
213
214 // Read the header list file into a buffer.
215 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
216 MemoryBuffer::getFile(Filename: InputPath);
217 if (std::error_code EC = listBuffer.getError())
218 return EC;
219
220 // Parse the header list into strings.
221 SmallVector<StringRef, 32> Strings;
222 listBuffer.get()->getBuffer().split(A&: Strings, Separator: "\n", MaxSplit: -1, KeepEmpty: false);
223
224 // Collect the header file names from the string list.
225 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
226 E = Strings.end();
227 I != E; ++I) {
228 StringRef Line = I->trim();
229 // Ignore comments and empty lines.
230 if (Line.empty() || (Line[0] == '#'))
231 continue;
232 SmallString<256> HeaderFileName;
233 // Prepend header file name prefix if it's not absolute.
234 if (llvm::sys::path::is_absolute(path: Line))
235 llvm::sys::path::native(path: Line, result&: HeaderFileName);
236 else {
237 if (HeaderDirectory.size() != 0)
238 HeaderFileName = HeaderDirectory;
239 else
240 HeaderFileName = CurrentDirectory;
241 llvm::sys::path::append(path&: HeaderFileName, a: Line);
242 llvm::sys::path::native(path&: HeaderFileName);
243 }
244 // Get canonical form.
245 HeaderFileName = getCanonicalPath(FilePath: HeaderFileName);
246 // Save the resulting header file path.
247 ProblemFileNames.push_back(Elt: std::string(HeaderFileName));
248 }
249 return std::error_code();
250}
251
252// Load single module map and extract header file list.
253std::error_code ModularizeUtilities::loadModuleMap(
254 llvm::StringRef InputPath) {
255 // Get file entry for module.modulemap file.
256 auto ModuleMapEntryOrErr = SourceMgr->getFileManager().getFileRef(Filename: InputPath);
257
258 // return error if not found.
259 if (!ModuleMapEntryOrErr) {
260 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
261 return errorToErrorCode(Err: ModuleMapEntryOrErr.takeError());
262 }
263 FileEntryRef ModuleMapEntry = *ModuleMapEntryOrErr;
264
265 // Because the module map parser uses a ForwardingDiagnosticConsumer,
266 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
267 DC.BeginSourceFile(LO: *LangOpts, PP: nullptr);
268
269 // Figure out the home directory for the module map file.
270 DirectoryEntryRef Dir = ModuleMapEntry.getDir();
271 StringRef DirName(Dir.getName());
272 if (llvm::sys::path::filename(path: DirName) == "Modules") {
273 DirName = llvm::sys::path::parent_path(path: DirName);
274 if (DirName.ends_with(Suffix: ".framework")) {
275 auto FrameworkDirOrErr = FileMgr->getDirectoryRef(DirName);
276 if (!FrameworkDirOrErr) {
277 // This can happen if there's a race between the above check and the
278 // removal of the directory.
279 return errorToErrorCode(Err: FrameworkDirOrErr.takeError());
280 }
281 Dir = *FrameworkDirOrErr;
282 }
283 }
284
285 std::unique_ptr<ModuleMap> ModMap;
286 ModMap.reset(p: new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
287 Target.get(), *HeaderInfo));
288
289 // Parse module.modulemap file into module map.
290 if (ModMap->parseAndLoadModuleMapFile(File: ModuleMapEntry, IsSystem: false, HomeDir: Dir)) {
291 return std::error_code(1, std::generic_category());
292 }
293
294 // Do matching end call.
295 DC.EndSourceFile();
296
297 // Reset missing header count.
298 MissingHeaderCount = 0;
299
300 if (!collectModuleMapHeaders(ModMap: ModMap.get()))
301 return std::error_code(1, std::generic_category());
302
303 // Save module map.
304 ModuleMaps.push_back(x: std::move(ModMap));
305
306 // Indicate we are using module maps.
307 HasModuleMap = true;
308
309 // Return code of 1 for missing headers.
310 if (MissingHeaderCount)
311 return std::error_code(1, std::generic_category());
312
313 return std::error_code();
314}
315
316// Collect module map headers.
317// Walks the modules and collects referenced headers into
318// HeaderFileNames.
319bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
320 SmallVector<std::pair<StringRef, const clang::Module *>, 0> Vec;
321 for (auto &M : ModMap->modules())
322 Vec.emplace_back(Args: M.first(), Args: M.second);
323 llvm::sort(C&: Vec, Comp: llvm::less_first());
324 for (auto &I : Vec)
325 if (!collectModuleHeaders(Mod: *I.second))
326 return false;
327 return true;
328}
329
330// Collect referenced headers from one module.
331// Collects the headers referenced in the given module into
332// HeaderFileNames.
333bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
334
335 // Ignore explicit modules because they often have dependencies
336 // we can't know.
337 if (Mod.IsExplicit)
338 return true;
339
340 // Treat headers in umbrella directory as dependencies.
341 DependentsVector UmbrellaDependents;
342
343 // Recursively do submodules.
344 for (auto *Submodule : Mod.submodules())
345 collectModuleHeaders(Mod: *Submodule);
346
347 if (std::optional<clang::Module::Header> UmbrellaHeader =
348 Mod.getUmbrellaHeaderAsWritten()) {
349 std::string HeaderPath = getCanonicalPath(FilePath: UmbrellaHeader->Entry.getName());
350 // Collect umbrella header.
351 HeaderFileNames.push_back(Elt: HeaderPath);
352
353 // FUTURE: When needed, umbrella header header collection goes here.
354 } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
355 Mod.getUmbrellaDirAsWritten()) {
356 // If there normal headers, assume these are umbrellas and skip collection.
357 if (Mod.getHeaders(HK: Module::HK_Normal).empty()) {
358 // Collect headers in umbrella directory.
359 if (!collectUmbrellaHeaders(UmbrellaDirName: UmbrellaDir->Entry.getName(),
360 Dependents&: UmbrellaDependents))
361 return false;
362 }
363 }
364
365 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
366 // assuming they are marked as such either because of unsuitability for
367 // modules or because they are meant to be included by another header,
368 // and thus should be ignored by modularize.
369
370 for (const auto &Header : Mod.getHeaders(HK: clang::Module::HK_Normal))
371 HeaderFileNames.push_back(Elt: getCanonicalPath(FilePath: Header.Entry.getName()));
372
373 int MissingCountThisModule = Mod.MissingHeaders.size();
374
375 for (int Index = 0; Index < MissingCountThisModule; ++Index) {
376 std::string MissingFile = Mod.MissingHeaders[Index].FileName;
377 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
378 errs() << Loc.printToString(SM: *SourceMgr)
379 << ": error : Header not found: " << MissingFile << "\n";
380 }
381
382 MissingHeaderCount += MissingCountThisModule;
383
384 return true;
385}
386
387// Collect headers from an umbrella directory.
388bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
389 DependentsVector &Dependents) {
390 // Initialize directory name.
391 SmallString<256> Directory(UmbrellaDirName);
392 // Walk the directory.
393 std::error_code EC;
394 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
395 I.increment(ec&: EC)) {
396 if (EC)
397 return false;
398 std::string File(I->path());
399 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
400 if (!Status)
401 return false;
402 llvm::sys::fs::file_type Type = Status->type();
403 // If the file is a directory, ignore the name and recurse.
404 if (Type == llvm::sys::fs::file_type::directory_file) {
405 if (!collectUmbrellaHeaders(UmbrellaDirName: File, Dependents))
406 return false;
407 continue;
408 }
409 // If the file does not have a common header extension, ignore it.
410 if (!isHeader(FileName: File))
411 continue;
412 // Save header name.
413 std::string HeaderPath = getCanonicalPath(FilePath: File);
414 Dependents.push_back(Elt: HeaderPath);
415 }
416 return true;
417}
418
419// Replace .. embedded in path for purposes of having
420// a canonical path.
421static std::string replaceDotDot(StringRef Path) {
422 SmallString<128> Buffer;
423 llvm::sys::path::const_iterator B = llvm::sys::path::begin(path: Path),
424 E = llvm::sys::path::end(path: Path);
425 while (B != E) {
426 if (*B == "..")
427 llvm::sys::path::remove_filename(path&: Buffer);
428 else if (*B != ".")
429 llvm::sys::path::append(path&: Buffer, a: *B);
430 ++B;
431 }
432 if (Path.ends_with(Suffix: "/") || Path.ends_with(Suffix: "\\"))
433 Buffer.append(NumInputs: 1, Elt: Path.back());
434 return Buffer.c_str();
435}
436
437// Convert header path to canonical form.
438// The canonical form is basically just use forward slashes, and remove "./".
439// \param FilePath The file path, relative to the module map directory.
440// \returns The file path in canonical form.
441std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
442 std::string Tmp(replaceDotDot(Path: FilePath));
443 llvm::replace(Range&: Tmp, OldValue: '\\', NewValue: '/');
444 StringRef Tmp2(Tmp);
445 if (Tmp2.starts_with(Prefix: "./"))
446 Tmp = std::string(Tmp2.substr(Start: 2));
447 return Tmp;
448}
449
450// Check for header file extension.
451// If the file extension is .h, .inc, or missing, it's
452// assumed to be a header.
453// \param FileName The file name. Must not be a directory.
454// \returns true if it has a header extension or no extension.
455bool ModularizeUtilities::isHeader(StringRef FileName) {
456 StringRef Extension = llvm::sys::path::extension(path: FileName);
457 if (Extension.size() == 0)
458 return true;
459 if (Extension.equals_insensitive(RHS: ".h"))
460 return true;
461 if (Extension.equals_insensitive(RHS: ".inc"))
462 return true;
463 return false;
464}
465
466// Get directory path component from file path.
467// \returns the component of the given path, which will be
468// relative if the given path is relative, absolute if the
469// given path is absolute, or "." if the path has no leading
470// path component.
471std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
472 SmallString<256> Directory(Path);
473 sys::path::remove_filename(path&: Directory);
474 if (Directory.size() == 0)
475 return ".";
476 return std::string(Directory);
477}
478
479// Add unique problem file.
480// Also standardizes the path.
481void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
482 FilePath = getCanonicalPath(FilePath);
483 // Don't add if already present.
484 for(auto &TestFilePath : ProblemFileNames) {
485 if (TestFilePath == FilePath)
486 return;
487 }
488 ProblemFileNames.push_back(Elt: FilePath);
489}
490
491// Add file with no compile errors.
492// Also standardizes the path.
493void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
494 FilePath = getCanonicalPath(FilePath);
495 GoodFileNames.push_back(Elt: FilePath);
496}
497
498// List problem files.
499void ModularizeUtilities::displayProblemFiles() {
500 errs() << "\nThese are the files with possible errors:\n\n";
501 for (auto &ProblemFile : ProblemFileNames) {
502 errs() << ProblemFile << "\n";
503 }
504}
505
506// List files with no problems.
507void ModularizeUtilities::displayGoodFiles() {
508 errs() << "\nThese are the files with no detected errors:\n\n";
509 for (auto &GoodFile : HeaderFileNames) {
510 bool Good = true;
511 for (auto &ProblemFile : ProblemFileNames) {
512 if (ProblemFile == GoodFile) {
513 Good = false;
514 break;
515 }
516 }
517 if (Good)
518 errs() << GoodFile << "\n";
519 }
520}
521
522// List files with problem files commented out.
523void ModularizeUtilities::displayCombinedFiles() {
524 errs() <<
525 "\nThese are the combined files, with problem files preceded by #:\n\n";
526 for (auto &File : HeaderFileNames) {
527 bool Good = true;
528 for (auto &ProblemFile : ProblemFileNames) {
529 if (ProblemFile == File) {
530 Good = false;
531 break;
532 }
533 }
534 errs() << (Good ? "" : "#") << File << "\n";
535 }
536}
537

source code of clang-tools-extra/modularize/ModularizeUtilities.cpp