1//===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class for loading and validating a module map or
10// header list by checking that all headers in the corresponding directories
11// are accounted for.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/SourceManager.h"
16#include "clang/Driver/Options.h"
17#include "clang/Frontend/CompilerInstance.h"
18#include "clang/Frontend/FrontendActions.h"
19#include "CoverageChecker.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/raw_ostream.h"
25#include "ModularizeUtilities.h"
26
27using namespace clang;
28using namespace llvm;
29using namespace Modularize;
30
31namespace {
32// Subclass TargetOptions so we can construct it inline with
33// the minimal option, the triple.
34class ModuleMapTargetOptions : public clang::TargetOptions {
35public:
36 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37};
38} // namespace
39
40// ModularizeUtilities class implementation.
41
42// Constructor.
43ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44 llvm::StringRef Prefix,
45 llvm::StringRef ProblemFilesListPath)
46 : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48 MissingHeaderCount(0),
49 // Init clang stuff needed for loading the module map and preprocessing.
50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51 DiagnosticOpts(new DiagnosticOptions()),
52 DC(llvm::errs(), DiagnosticOpts.get()),
53 Diagnostics(
54 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55 TargetOpts(new ModuleMapTargetOptions()),
56 Target(TargetInfo::CreateTargetInfo(Diags&: *Diagnostics, Opts: TargetOpts)),
57 FileMgr(new FileManager(FileSystemOpts)),
58 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
60 *SourceMgr, *Diagnostics, *LangOpts,
61 Target.get())) {}
62
63// Create instance of ModularizeUtilities, to simplify setting up
64// subordinate objects.
65ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
66 std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
67 llvm::StringRef ProblemFilesListPath) {
68
69 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
70}
71
72// Load all header lists and dependencies.
73std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74 // For each input file.
75 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
76 llvm::StringRef InputPath = *I;
77 // If it's a module map.
78 if (InputPath.ends_with(Suffix: ".modulemap")) {
79 // Load the module map.
80 if (std::error_code EC = loadModuleMap(InputPath))
81 return EC;
82 } else {
83 // Else we assume it's a header list and load it.
84 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
85 errs() << "modularize: error: Unable to get header list '" << InputPath
86 << "': " << EC.message() << '\n';
87 return EC;
88 }
89 }
90 }
91 // If we have a problem files list.
92 if (ProblemFilesPath.size() != 0) {
93 // Load problem files list.
94 if (std::error_code EC = loadProblemHeaderList(InputPath: ProblemFilesPath)) {
95 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
96 << "': " << EC.message() << '\n';
97 return EC;
98 }
99 }
100 return std::error_code();
101}
102
103// Do coverage checks.
104// For each loaded module map, do header coverage check.
105// Starting from the directory of the module.modulemap file,
106// Find all header files, optionally looking only at files
107// covered by the include path options, and compare against
108// the headers referenced by the module.modulemap file.
109// Display warnings for unaccounted-for header files.
110// Returns 0 if there were no errors or warnings, 1 if there
111// were warnings, 2 if any other problem, such as a bad
112// module map path argument was specified.
113std::error_code ModularizeUtilities::doCoverageCheck(
114 std::vector<std::string> &IncludePaths,
115 llvm::ArrayRef<std::string> CommandLine) {
116 int ModuleMapCount = ModuleMaps.size();
117 int ModuleMapIndex;
118 std::error_code EC;
119 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
120 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
121 auto Checker = CoverageChecker::createCoverageChecker(
122 ModuleMapPath: InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
123 ModuleMap: ModMap.get());
124 std::error_code LocalEC = Checker->doChecks();
125 if (LocalEC.value() > 0)
126 EC = LocalEC;
127 }
128 return EC;
129}
130
131// Load single header list and dependencies.
132std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
133 llvm::StringRef InputPath) {
134
135 // By default, use the path component of the list file name.
136 SmallString<256> HeaderDirectory(InputPath);
137 llvm::sys::path::remove_filename(path&: HeaderDirectory);
138 SmallString<256> CurrentDirectory;
139 llvm::sys::fs::current_path(result&: CurrentDirectory);
140
141 // Get the prefix if we have one.
142 if (HeaderPrefix.size() != 0)
143 HeaderDirectory = HeaderPrefix;
144
145 // Read the header list file into a buffer.
146 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
147 MemoryBuffer::getFile(Filename: InputPath);
148 if (std::error_code EC = listBuffer.getError())
149 return EC;
150
151 // Parse the header list into strings.
152 SmallVector<StringRef, 32> Strings;
153 listBuffer.get()->getBuffer().split(A&: Strings, Separator: "\n", MaxSplit: -1, KeepEmpty: false);
154
155 // Collect the header file names from the string list.
156 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
157 E = Strings.end();
158 I != E; ++I) {
159 StringRef Line = I->trim();
160 // Ignore comments and empty lines.
161 if (Line.empty() || (Line[0] == '#'))
162 continue;
163 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(Separator: ':');
164 SmallString<256> HeaderFileName;
165 // Prepend header file name prefix if it's not absolute.
166 if (llvm::sys::path::is_absolute(path: TargetAndDependents.first))
167 llvm::sys::path::native(path: TargetAndDependents.first, result&: HeaderFileName);
168 else {
169 if (HeaderDirectory.size() != 0)
170 HeaderFileName = HeaderDirectory;
171 else
172 HeaderFileName = CurrentDirectory;
173 llvm::sys::path::append(path&: HeaderFileName, a: TargetAndDependents.first);
174 llvm::sys::path::native(path&: HeaderFileName);
175 }
176 // Handle optional dependencies.
177 DependentsVector Dependents;
178 SmallVector<StringRef, 4> DependentsList;
179 TargetAndDependents.second.split(A&: DependentsList, Separator: " ", MaxSplit: -1, KeepEmpty: false);
180 int Count = DependentsList.size();
181 for (int Index = 0; Index < Count; ++Index) {
182 SmallString<256> Dependent;
183 if (llvm::sys::path::is_absolute(path: DependentsList[Index]))
184 Dependent = DependentsList[Index];
185 else {
186 if (HeaderDirectory.size() != 0)
187 Dependent = HeaderDirectory;
188 else
189 Dependent = CurrentDirectory;
190 llvm::sys::path::append(path&: Dependent, a: DependentsList[Index]);
191 }
192 llvm::sys::path::native(path&: Dependent);
193 Dependents.push_back(Elt: getCanonicalPath(FilePath: Dependent.str()));
194 }
195 // Get canonical form.
196 HeaderFileName = getCanonicalPath(FilePath: HeaderFileName);
197 // Save the resulting header file path and dependencies.
198 HeaderFileNames.push_back(Elt: std::string(HeaderFileName));
199 Dependencies[HeaderFileName.str()] = Dependents;
200 }
201 return std::error_code();
202}
203
204// Load problem header list.
205std::error_code ModularizeUtilities::loadProblemHeaderList(
206 llvm::StringRef InputPath) {
207
208 // By default, use the path component of the list file name.
209 SmallString<256> HeaderDirectory(InputPath);
210 llvm::sys::path::remove_filename(path&: HeaderDirectory);
211 SmallString<256> CurrentDirectory;
212 llvm::sys::fs::current_path(result&: CurrentDirectory);
213
214 // Get the prefix if we have one.
215 if (HeaderPrefix.size() != 0)
216 HeaderDirectory = HeaderPrefix;
217
218 // Read the header list file into a buffer.
219 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
220 MemoryBuffer::getFile(Filename: InputPath);
221 if (std::error_code EC = listBuffer.getError())
222 return EC;
223
224 // Parse the header list into strings.
225 SmallVector<StringRef, 32> Strings;
226 listBuffer.get()->getBuffer().split(A&: Strings, Separator: "\n", MaxSplit: -1, KeepEmpty: false);
227
228 // Collect the header file names from the string list.
229 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
230 E = Strings.end();
231 I != E; ++I) {
232 StringRef Line = I->trim();
233 // Ignore comments and empty lines.
234 if (Line.empty() || (Line[0] == '#'))
235 continue;
236 SmallString<256> HeaderFileName;
237 // Prepend header file name prefix if it's not absolute.
238 if (llvm::sys::path::is_absolute(path: Line))
239 llvm::sys::path::native(path: Line, result&: HeaderFileName);
240 else {
241 if (HeaderDirectory.size() != 0)
242 HeaderFileName = HeaderDirectory;
243 else
244 HeaderFileName = CurrentDirectory;
245 llvm::sys::path::append(path&: HeaderFileName, a: Line);
246 llvm::sys::path::native(path&: HeaderFileName);
247 }
248 // Get canonical form.
249 HeaderFileName = getCanonicalPath(FilePath: HeaderFileName);
250 // Save the resulting header file path.
251 ProblemFileNames.push_back(Elt: std::string(HeaderFileName));
252 }
253 return std::error_code();
254}
255
256// Load single module map and extract header file list.
257std::error_code ModularizeUtilities::loadModuleMap(
258 llvm::StringRef InputPath) {
259 // Get file entry for module.modulemap file.
260 auto ModuleMapEntryOrErr = SourceMgr->getFileManager().getFileRef(Filename: InputPath);
261
262 // return error if not found.
263 if (!ModuleMapEntryOrErr) {
264 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
265 return errorToErrorCode(Err: ModuleMapEntryOrErr.takeError());
266 }
267 FileEntryRef ModuleMapEntry = *ModuleMapEntryOrErr;
268
269 // Because the module map parser uses a ForwardingDiagnosticConsumer,
270 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
271 DC.BeginSourceFile(LO: *LangOpts, PP: nullptr);
272
273 // Figure out the home directory for the module map file.
274 DirectoryEntryRef Dir = ModuleMapEntry.getDir();
275 StringRef DirName(Dir.getName());
276 if (llvm::sys::path::filename(path: DirName) == "Modules") {
277 DirName = llvm::sys::path::parent_path(path: DirName);
278 if (DirName.ends_with(Suffix: ".framework")) {
279 auto FrameworkDirOrErr = FileMgr->getDirectoryRef(DirName);
280 if (!FrameworkDirOrErr) {
281 // This can happen if there's a race between the above check and the
282 // removal of the directory.
283 return errorToErrorCode(Err: FrameworkDirOrErr.takeError());
284 }
285 Dir = *FrameworkDirOrErr;
286 }
287 }
288
289 std::unique_ptr<ModuleMap> ModMap;
290 ModMap.reset(p: new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
291 Target.get(), *HeaderInfo));
292
293 // Parse module.modulemap file into module map.
294 if (ModMap->parseModuleMapFile(File: ModuleMapEntry, IsSystem: false, HomeDir: Dir)) {
295 return std::error_code(1, std::generic_category());
296 }
297
298 // Do matching end call.
299 DC.EndSourceFile();
300
301 // Reset missing header count.
302 MissingHeaderCount = 0;
303
304 if (!collectModuleMapHeaders(ModMap: ModMap.get()))
305 return std::error_code(1, std::generic_category());
306
307 // Save module map.
308 ModuleMaps.push_back(x: std::move(ModMap));
309
310 // Indicate we are using module maps.
311 HasModuleMap = true;
312
313 // Return code of 1 for missing headers.
314 if (MissingHeaderCount)
315 return std::error_code(1, std::generic_category());
316
317 return std::error_code();
318}
319
320// Collect module map headers.
321// Walks the modules and collects referenced headers into
322// HeaderFileNames.
323bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
324 SmallVector<std::pair<StringRef, const clang::Module *>, 0> Vec;
325 for (auto &M : ModMap->modules())
326 Vec.emplace_back(Args: M.first(), Args: M.second);
327 llvm::sort(C&: Vec, Comp: llvm::less_first());
328 for (auto &I : Vec)
329 if (!collectModuleHeaders(Mod: *I.second))
330 return false;
331 return true;
332}
333
334// Collect referenced headers from one module.
335// Collects the headers referenced in the given module into
336// HeaderFileNames.
337bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
338
339 // Ignore explicit modules because they often have dependencies
340 // we can't know.
341 if (Mod.IsExplicit)
342 return true;
343
344 // Treat headers in umbrella directory as dependencies.
345 DependentsVector UmbrellaDependents;
346
347 // Recursively do submodules.
348 for (auto *Submodule : Mod.submodules())
349 collectModuleHeaders(Mod: *Submodule);
350
351 if (std::optional<clang::Module::Header> UmbrellaHeader =
352 Mod.getUmbrellaHeaderAsWritten()) {
353 std::string HeaderPath = getCanonicalPath(FilePath: UmbrellaHeader->Entry.getName());
354 // Collect umbrella header.
355 HeaderFileNames.push_back(Elt: HeaderPath);
356
357 // FUTURE: When needed, umbrella header header collection goes here.
358 } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
359 Mod.getUmbrellaDirAsWritten()) {
360 // If there normal headers, assume these are umbrellas and skip collection.
361 if (Mod.Headers->size() == 0) {
362 // Collect headers in umbrella directory.
363 if (!collectUmbrellaHeaders(UmbrellaDirName: UmbrellaDir->Entry.getName(),
364 Dependents&: UmbrellaDependents))
365 return false;
366 }
367 }
368
369 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370 // assuming they are marked as such either because of unsuitability for
371 // modules or because they are meant to be included by another header,
372 // and thus should be ignored by modularize.
373
374 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
375
376 for (int Index = 0; Index < NormalHeaderCount; ++Index) {
377 DependentsVector NormalDependents;
378 // Collect normal header.
379 const clang::Module::Header &Header(
380 Mod.Headers[clang::Module::HK_Normal][Index]);
381 std::string HeaderPath = getCanonicalPath(FilePath: Header.Entry.getName());
382 HeaderFileNames.push_back(Elt: HeaderPath);
383 }
384
385 int MissingCountThisModule = Mod.MissingHeaders.size();
386
387 for (int Index = 0; Index < MissingCountThisModule; ++Index) {
388 std::string MissingFile = Mod.MissingHeaders[Index].FileName;
389 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
390 errs() << Loc.printToString(SM: *SourceMgr)
391 << ": error : Header not found: " << MissingFile << "\n";
392 }
393
394 MissingHeaderCount += MissingCountThisModule;
395
396 return true;
397}
398
399// Collect headers from an umbrella directory.
400bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
401 DependentsVector &Dependents) {
402 // Initialize directory name.
403 SmallString<256> Directory(UmbrellaDirName);
404 // Walk the directory.
405 std::error_code EC;
406 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
407 I.increment(ec&: EC)) {
408 if (EC)
409 return false;
410 std::string File(I->path());
411 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
412 if (!Status)
413 return false;
414 llvm::sys::fs::file_type Type = Status->type();
415 // If the file is a directory, ignore the name and recurse.
416 if (Type == llvm::sys::fs::file_type::directory_file) {
417 if (!collectUmbrellaHeaders(UmbrellaDirName: File, Dependents))
418 return false;
419 continue;
420 }
421 // If the file does not have a common header extension, ignore it.
422 if (!isHeader(FileName: File))
423 continue;
424 // Save header name.
425 std::string HeaderPath = getCanonicalPath(FilePath: File);
426 Dependents.push_back(Elt: HeaderPath);
427 }
428 return true;
429}
430
431// Replace .. embedded in path for purposes of having
432// a canonical path.
433static std::string replaceDotDot(StringRef Path) {
434 SmallString<128> Buffer;
435 llvm::sys::path::const_iterator B = llvm::sys::path::begin(path: Path),
436 E = llvm::sys::path::end(path: Path);
437 while (B != E) {
438 if (B->compare(RHS: ".") == 0) {
439 }
440 else if (B->compare(RHS: "..") == 0)
441 llvm::sys::path::remove_filename(path&: Buffer);
442 else
443 llvm::sys::path::append(path&: Buffer, a: *B);
444 ++B;
445 }
446 if (Path.ends_with(Suffix: "/") || Path.ends_with(Suffix: "\\"))
447 Buffer.append(NumInputs: 1, Elt: Path.back());
448 return Buffer.c_str();
449}
450
451// Convert header path to canonical form.
452// The canonical form is basically just use forward slashes, and remove "./".
453// \param FilePath The file path, relative to the module map directory.
454// \returns The file path in canonical form.
455std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
456 std::string Tmp(replaceDotDot(Path: FilePath));
457 std::replace(first: Tmp.begin(), last: Tmp.end(), old_value: '\\', new_value: '/');
458 StringRef Tmp2(Tmp);
459 if (Tmp2.starts_with(Prefix: "./"))
460 Tmp = std::string(Tmp2.substr(Start: 2));
461 return Tmp;
462}
463
464// Check for header file extension.
465// If the file extension is .h, .inc, or missing, it's
466// assumed to be a header.
467// \param FileName The file name. Must not be a directory.
468// \returns true if it has a header extension or no extension.
469bool ModularizeUtilities::isHeader(StringRef FileName) {
470 StringRef Extension = llvm::sys::path::extension(path: FileName);
471 if (Extension.size() == 0)
472 return true;
473 if (Extension.equals_insensitive(RHS: ".h"))
474 return true;
475 if (Extension.equals_insensitive(RHS: ".inc"))
476 return true;
477 return false;
478}
479
480// Get directory path component from file path.
481// \returns the component of the given path, which will be
482// relative if the given path is relative, absolute if the
483// given path is absolute, or "." if the path has no leading
484// path component.
485std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
486 SmallString<256> Directory(Path);
487 sys::path::remove_filename(path&: Directory);
488 if (Directory.size() == 0)
489 return ".";
490 return std::string(Directory);
491}
492
493// Add unique problem file.
494// Also standardizes the path.
495void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
496 FilePath = getCanonicalPath(FilePath);
497 // Don't add if already present.
498 for(auto &TestFilePath : ProblemFileNames) {
499 if (TestFilePath == FilePath)
500 return;
501 }
502 ProblemFileNames.push_back(Elt: FilePath);
503}
504
505// Add file with no compile errors.
506// Also standardizes the path.
507void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
508 FilePath = getCanonicalPath(FilePath);
509 GoodFileNames.push_back(Elt: FilePath);
510}
511
512// List problem files.
513void ModularizeUtilities::displayProblemFiles() {
514 errs() << "\nThese are the files with possible errors:\n\n";
515 for (auto &ProblemFile : ProblemFileNames) {
516 errs() << ProblemFile << "\n";
517 }
518}
519
520// List files with no problems.
521void ModularizeUtilities::displayGoodFiles() {
522 errs() << "\nThese are the files with no detected errors:\n\n";
523 for (auto &GoodFile : HeaderFileNames) {
524 bool Good = true;
525 for (auto &ProblemFile : ProblemFileNames) {
526 if (ProblemFile == GoodFile) {
527 Good = false;
528 break;
529 }
530 }
531 if (Good)
532 errs() << GoodFile << "\n";
533 }
534}
535
536// List files with problem files commented out.
537void ModularizeUtilities::displayCombinedFiles() {
538 errs() <<
539 "\nThese are the combined files, with problem files preceded by #:\n\n";
540 for (auto &File : HeaderFileNames) {
541 bool Good = true;
542 for (auto &ProblemFile : ProblemFileNames) {
543 if (ProblemFile == File) {
544 Good = false;
545 break;
546 }
547 }
548 errs() << (Good ? "" : "#") << File << "\n";
549 }
550}
551

source code of clang-tools-extra/modularize/ModularizeUtilities.cpp