1//===--- BuildConfusableTable.cpp - clang-tidy---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/STLExtras.h"
10#include "llvm/ADT/StringExtras.h"
11#include "llvm/Support/ConvertUTF.h"
12#include "llvm/Support/MemoryBuffer.h"
13#include "llvm/Support/raw_ostream.h"
14
15using namespace llvm;
16
17int main(int argc, char *argv[]) {
18 auto ErrorOrBuffer = MemoryBuffer::getFile(Filename: argv[1], IsText: true);
19 if (!ErrorOrBuffer)
20 return 1;
21 std::unique_ptr<MemoryBuffer> Buffer = std::move(ErrorOrBuffer.get());
22 StringRef Content = Buffer->getBuffer();
23 Content = Content.drop_until(F: [](char C) { return C == '#'; });
24 SmallVector<StringRef> Lines;
25 SplitString(Source: Content, OutFragments&: Lines, Delimiters: "\r\n");
26
27 std::vector<std::pair<llvm::UTF32, SmallVector<llvm::UTF32>>> Entries;
28 SmallVector<StringRef> Values;
29 for (StringRef Line : Lines) {
30 if (Line.starts_with(Prefix: "#"))
31 continue;
32
33 Values.clear();
34 Line.split(A&: Values, Separator: ';');
35 if (Values.size() < 2) {
36 errs() << "Failed to parse: " << Line << "\n";
37 return 2;
38 }
39
40 llvm::StringRef From = Values[0].trim();
41 llvm::UTF32 CodePoint = 0;
42 From.getAsInteger(Radix: 16, Result&: CodePoint);
43
44 SmallVector<llvm::UTF32> To;
45 SmallVector<StringRef> ToN;
46 Values[1].split(A&: ToN, Separator: ' ', MaxSplit: -1, KeepEmpty: false);
47 for (StringRef ToI : ToN) {
48 llvm::UTF32 ToCodePoint = 0;
49 ToI.trim().getAsInteger(Radix: 16, Result&: ToCodePoint);
50 To.push_back(Elt: ToCodePoint);
51 }
52 // Sentinel
53 To.push_back(Elt: 0);
54
55 Entries.emplace_back(args&: CodePoint, args&: To);
56 }
57 llvm::sort(C&: Entries);
58
59 unsigned LargestValue =
60 llvm::max_element(Range&: Entries, C: [](const auto &Entry0, const auto &Entry1) {
61 return Entry0.second.size() < Entry1.second.size();
62 })->second.size();
63
64 std::error_code Ec;
65 llvm::raw_fd_ostream Os(argv[2], Ec);
66
67 // FIXME: If memory consumption and/or lookup time becomes a constraint, it
68 // maybe worth using a more elaborate data structure.
69 Os << "struct {llvm::UTF32 codepoint; llvm::UTF32 values[" << LargestValue
70 << "];} "
71 "ConfusableEntries[] = {\n";
72 for (const auto &Values : Entries) {
73 Os << " { ";
74 Os << Values.first;
75 Os << ", {";
76 for (auto CP : Values.second)
77 Os << CP << ", ";
78
79 Os << "}},\n";
80 }
81 Os << "};\n";
82 return 0;
83}
84

source code of clang-tools-extra/clang-tidy/misc/ConfusableTable/BuildConfusableTable.cpp