IdentifierTable.cpp source code [clang/lib/Basic/IdentifierTable.cpp]

1	//===- IdentifierTable.cpp - Hash table for identifier lookup -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the IdentifierInfo, IdentifierVisitor, and
10	// IdentifierTable interfaces.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "clang/Basic/IdentifierTable.h"
15	#include "clang/Basic/CharInfo.h"
16	#include "clang/Basic/DiagnosticLex.h"
17	#include "clang/Basic/LangOptions.h"
18	#include "clang/Basic/OperatorKinds.h"
19	#include "clang/Basic/Specifiers.h"
20	#include "clang/Basic/TargetBuiltins.h"
21	#include "clang/Basic/TokenKinds.h"
22	#include "llvm/ADT/DenseMapInfo.h"
23	#include "llvm/ADT/FoldingSet.h"
24	#include "llvm/ADT/StringMap.h"
25	#include "llvm/ADT/StringRef.h"
26	#include "llvm/Support/Allocator.h"
27	#include "llvm/Support/raw_ostream.h"
28	#include <cassert>
29	#include <cstdio>
30	#include <cstring>
31	#include <string>
32
33	using namespace clang;
34
35	// A check to make sure the ObjCOrBuiltinID has sufficient room to store the
36	// largest possible target/aux-target combination. If we exceed this, we likely
37	// need to just change the ObjCOrBuiltinIDBits value in IdentifierTable.h.
38	static_assert(`2` * LargestBuiltinID < (`2` << (InterestingIdentifierBits - `1`)),
39	"Insufficient ObjCOrBuiltinID Bits");
40
41	//===----------------------------------------------------------------------===//
42	// IdentifierTable Implementation
43	//===----------------------------------------------------------------------===//
44
45	IdentifierIterator::~IdentifierIterator() = default;
46
47	IdentifierInfoLookup::~IdentifierInfoLookup() = default;
48
49	namespace {
50
51	/// A simple identifier lookup iterator that represents an
52	/// empty sequence of identifiers.
53	class EmptyLookupIterator : public IdentifierIterator {
54	public:
55	StringRef Next() override { return StringRef(); }
56	};
57
58	} // namespace
59
60	IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
61	return new EmptyLookupIterator ();
62	}
63
64	IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
65	: HashTable(`8192`), // Start with space for 8K identifiers.
66	ExternalLookup(ExternalLookup) {}
67
68	IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
69	IdentifierInfoLookup *ExternalLookup)
70	: IdentifierTable (ExternalLookup) {
71	// Populate the identifier table with info about keywords for the current
72	// language.
73	AddKeywords(LangOpts);
74	}
75
76	//===----------------------------------------------------------------------===//
77	// Language Keyword Implementation
78	//===----------------------------------------------------------------------===//
79
80	// Constants for TokenKinds.def
81	namespace {
82
83	enum TokenKey : unsigned {
84	KEYC99 = `0x1`,
85	KEYCXX = `0x2`,
86	KEYCXX11 = `0x4`,
87	KEYGNU = `0x8`,
88	KEYMS = `0x10`,
89	BOOLSUPPORT = `0x20`,
90	KEYALTIVEC = `0x40`,
91	KEYNOCXX = `0x80`,
92	KEYBORLAND = `0x100`,
93	KEYOPENCLC = `0x200`,
94	KEYC23 = `0x400`,
95	KEYNOMS18 = `0x800`,
96	KEYNOOPENCL = `0x1000`,
97	WCHARSUPPORT = `0x2000`,
98	HALFSUPPORT = `0x4000`,
99	CHAR8SUPPORT = `0x8000`,
100	KEYOBJC = `0x10000`,
101	KEYZVECTOR = `0x20000`,
102	KEYCOROUTINES = `0x40000`,
103	KEYMODULES = `0x80000`,
104	KEYCXX20 = `0x100000`,
105	KEYOPENCLCXX = `0x200000`,
106	KEYMSCOMPAT = `0x400000`,
107	KEYSYCL = `0x800000`,
108	KEYCUDA = `0x1000000`,
109	KEYZOS = `0x2000000`,
110	KEYNOZOS = `0x4000000`,
111	KEYHLSL = `0x8000000`,
112	KEYFIXEDPOINT = `0x10000000`,
113	KEYMAX = KEYFIXEDPOINT, // The maximum key
114	KEYALLCXX = KEYCXX \| KEYCXX11 \| KEYCXX20,
115	KEYALL = (KEYMAX \| (KEYMAX - `1`)) & ~KEYNOMS18 & ~KEYNOOPENCL &
116	~KEYNOZOS // KEYNOMS18, KEYNOOPENCL, KEYNOZOS are excluded.
117	};
118
119	/// How a keyword is treated in the selected standard. This enum is ordered
120	/// intentionally so that the value that 'wins' is the most 'permissive'.
121	enum KeywordStatus {
122	KS_Unknown, // Not yet calculated. Used when figuring out the status.
123	KS_Disabled, // Disabled
124	KS_Future, // Is a keyword in future standard
125	KS_Extension, // Is an extension
126	KS_Enabled, // Enabled
127	};
128
129	} // namespace
130
131	// This works on a single TokenKey flag and checks the LangOpts to get the
132	// KeywordStatus based exclusively on this flag, so that it can be merged in
133	// getKeywordStatus. Most should be enabled/disabled, but some might imply
134	// 'future' versions, or extensions. Returns 'unknown' unless this is KNOWN to
135	// be disabled, and the calling function makes it 'disabled' if no other flag
136	// changes it. This is necessary for the KEYNOCXX and KEYNOOPENCL flags.
137	static KeywordStatus getKeywordStatusHelper(const LangOptions &LangOpts,
138	TokenKey Flag) {
139	// Flag is a single bit version of TokenKey (that is, not
140	// KEYALL/KEYALLCXX/etc), so we can check with == throughout this function.
141	assert((Flag & ~(Flag - `1`)) == Flag && "Multiple bits set?");
142
143	switch (Flag) {
144	case KEYC99:
145	if (LangOpts.C99)
146	return KS_Enabled;
147	return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
148	case KEYC23:
149	if (LangOpts.C23)
150	return KS_Enabled;
151	return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
152	case KEYCXX:
153	return LangOpts.CPlusPlus ? KS_Enabled : KS_Unknown;
154	case KEYCXX11:
155	if (LangOpts.CPlusPlus11)
156	return KS_Enabled;
157	return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
158	case KEYCXX20:
159	if (LangOpts.CPlusPlus20)
160	return KS_Enabled;
161	return LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
162	case KEYGNU:
163	return LangOpts.GNUKeywords ? KS_Extension : KS_Unknown;
164	case KEYMS:
165	return LangOpts.MicrosoftExt ? KS_Extension : KS_Unknown;
166	case BOOLSUPPORT:
167	if (LangOpts.Bool) return KS_Enabled;
168	return !LangOpts.CPlusPlus ? KS_Future : KS_Unknown;
169	case KEYALTIVEC:
170	return LangOpts.AltiVec ? KS_Enabled : KS_Unknown;
171	case KEYBORLAND:
172	return LangOpts.Borland ? KS_Extension : KS_Unknown;
173	case KEYOPENCLC:
174	return LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus ? KS_Enabled
175	: KS_Unknown;
176	case WCHARSUPPORT:
177	return LangOpts.WChar ? KS_Enabled : KS_Unknown;
178	case HALFSUPPORT:
179	return LangOpts.Half ? KS_Enabled : KS_Unknown;
180	case CHAR8SUPPORT:
181	if (LangOpts.Char8) return KS_Enabled;
182	if (LangOpts.CPlusPlus20) return KS_Unknown;
183	if (LangOpts.CPlusPlus) return KS_Future;
184	return KS_Unknown;
185	case KEYOBJC:
186	// We treat bridge casts as objective-C keywords so we can warn on them
187	// in non-arc mode.
188	return LangOpts.ObjC ? KS_Enabled : KS_Unknown;
189	case KEYZVECTOR:
190	return LangOpts.ZVector ? KS_Enabled : KS_Unknown;
191	case KEYCOROUTINES:
192	return LangOpts.Coroutines ? KS_Enabled : KS_Unknown;
193	case KEYMODULES:
194	return KS_Unknown;
195	case KEYOPENCLCXX:
196	return LangOpts.OpenCLCPlusPlus ? KS_Enabled : KS_Unknown;
197	case KEYMSCOMPAT:
198	return LangOpts.MSVCCompat ? KS_Enabled : KS_Unknown;
199	case KEYSYCL:
200	return LangOpts.isSYCL() ? KS_Enabled : KS_Unknown;
201	case KEYCUDA:
202	return LangOpts.CUDA ? KS_Enabled : KS_Unknown;
203	case KEYZOS:
204	return LangOpts.ZOSExt ? KS_Enabled : KS_Unknown;
205	case KEYHLSL:
206	return LangOpts.HLSL ? KS_Enabled : KS_Unknown;
207	case KEYNOCXX:
208	// This is enabled in all non-C++ modes, but might be enabled for other
209	// reasons as well.
210	return LangOpts.CPlusPlus ? KS_Unknown : KS_Enabled;
211	case KEYNOOPENCL:
212	case KEYNOMS18:
213	case KEYNOZOS:
214	// The disable behavior for this is handled in getKeywordStatus.
215	return KS_Unknown;
216	case KEYFIXEDPOINT:
217	return LangOpts.FixedPoint ? KS_Enabled : KS_Disabled;
218	default:
219	llvm_unreachable("Unknown KeywordStatus flag");
220	}
221	}
222
223	/// Translates flags as specified in TokenKinds.def into keyword status
224	/// in the given language standard.
225	static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
226	unsigned Flags) {
227	// KEYALL means always enabled, so special case this one.
228	if (Flags == KEYALL) return KS_Enabled;
229	// These are tests that need to 'always win', as they are special in that they
230	// disable based on certain conditions.
231	if (LangOpts.OpenCL && (Flags & KEYNOOPENCL)) return KS_Disabled;
232	if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
233	!LangOpts.isCompatibleWithMSVC(MajorVersion: LangOptions::MSVC2015))
234	return KS_Disabled;
235	if (LangOpts.ZOSExt && (Flags & KEYNOZOS))
236	return KS_Disabled;
237	KeywordStatus CurStatus = KS_Unknown;
238
239	while (Flags != `0`) {
240	unsigned CurFlag = Flags & ~(Flags - `1`);
241	Flags = Flags & ~CurFlag;
242	CurStatus = std::max(
243	a: CurStatus,
244	b: getKeywordStatusHelper(LangOpts, Flag: static_cast<TokenKey>(CurFlag)));
245	}
246
247	if (CurStatus == KS_Unknown)
248	return KS_Disabled;
249	return CurStatus;
250	}
251
252	static bool IsKeywordInCpp(unsigned Flags) {
253	return (Flags & (KEYCXX \| KEYCXX11 \| KEYCXX20 \| BOOLSUPPORT \| WCHARSUPPORT \|
254	CHAR8SUPPORT)) != `0`;
255	}
256
257	static void MarkIdentifierAsKeywordInCpp(IdentifierTable &Table,
258	StringRef Name) {
259	IdentifierInfo &II = Table.get(Name, TokenCode: tok::identifier);
260	II.setIsKeywordInCPlusPlus();
261	II.setHandleIdentifierCase();
262	}
263
264	/// AddKeyword - This method is used to associate a token ID with specific
265	/// identifiers because they are language keywords. This causes the lexer to
266	/// automatically map matching identifiers to specialized token codes.
267	static void AddKeyword(StringRef Keyword,
268	tok::TokenKind TokenCode, unsigned Flags,
269	const LangOptions &LangOpts, IdentifierTable &Table) {
270	KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
271
272	// Don't add this keyword if disabled in this language and isn't otherwise
273	// special.
274	if (AddResult == KS_Disabled) {
275	// We do not consider any identifiers to be C++ keywords when in
276	// Objective-C because @ effectively introduces a custom grammar where C++
277	// keywords can be used (and similar for selectors). We could enable this
278	// for Objective-C, but it would require more logic to ensure we do not
279	// issue compatibility diagnostics in these cases.
280	if (!LangOpts.ObjC && IsKeywordInCpp(Flags))
281	MarkIdentifierAsKeywordInCpp(Table, Name: Keyword);
282	return;
283	}
284
285	IdentifierInfo &Info =
286	Table.get(Name: Keyword, TokenCode: AddResult == KS_Future ? tok::identifier : TokenCode);
287	Info.setIsExtensionToken(AddResult == KS_Extension);
288	Info.setIsFutureCompatKeyword(AddResult == KS_Future);
289	}
290
291	/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative
292	/// representations.
293	static void AddCXXOperatorKeyword(StringRef Keyword,
294	tok::TokenKind TokenCode,
295	IdentifierTable &Table) {
296	IdentifierInfo &Info = Table.get(Name: Keyword, TokenCode);
297	Info.setIsCPlusPlusOperatorKeyword();
298	}
299
300	/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector"
301	/// or "property".
302	static void AddObjCKeyword(StringRef Name,
303	tok::ObjCKeywordKind ObjCID,
304	IdentifierTable &Table) {
305	Table.get(Name).setObjCKeywordID(ObjCID);
306	}
307
308	static void AddNotableIdentifier(StringRef Name,
309	tok::NotableIdentifierKind BTID,
310	IdentifierTable &Table) {
311	// Don't add 'not_notable' identifier.
312	if (BTID != tok::not_notable) {
313	IdentifierInfo &Info = Table.get(Name, TokenCode: tok::identifier);
314	Info.setNotableIdentifierID(BTID);
315	}
316	}
317
318	/// AddKeywords - Add all keywords to the symbol table.
319	///
320	void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
321	// Add keywords and tokens for the current language.
322	#define KEYWORD(NAME, FLAGS) \
323	AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
324	FLAGS, LangOpts, *this);
325	#define ALIAS(NAME, TOK, FLAGS) \
326	AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
327	FLAGS, LangOpts, *this);
328	#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
329	if (LangOpts.CXXOperatorNames) \
330	AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this); \
331	else \
332	MarkIdentifierAsKeywordInCpp(*this, StringRef(#NAME));
333	#define OBJC_AT_KEYWORD(NAME) \
334	if (LangOpts.ObjC) \
335	AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
336	#define NOTABLE_IDENTIFIER(NAME) \
337	AddNotableIdentifier(StringRef(#NAME), tok::NAME, *this);
338
339	#define TESTING_KEYWORD(NAME, FLAGS)
340	#include "clang/Basic/TokenKinds.def"
341
342	if (LangOpts.ParseUnknownAnytype)
343	AddKeyword(Keyword: "__unknown_anytype", TokenCode: tok::kw___unknown_anytype, Flags: KEYALL,
344	LangOpts, Table&: *this);
345
346	if (LangOpts.DeclSpecKeyword)
347	AddKeyword(Keyword: "__declspec", TokenCode: tok::kw___declspec, Flags: KEYALL, LangOpts, Table&: *this);
348
349	if (LangOpts.IEEE128)
350	AddKeyword(Keyword: "__ieee128", TokenCode: tok::kw___float128, Flags: KEYALL, LangOpts, Table&: *this);
351
352	// Add the 'import' contextual keyword.
353	get(Name: "import").setModulesImport(true);
354	}
355
356	/// Checks if the specified token kind represents a keyword in the
357	/// specified language.
358	/// \returns Status of the keyword in the language.
359	static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
360	tok::TokenKind K) {
361	switch (K) {
362	#define KEYWORD(NAME, FLAGS) \
363	case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
364	#include "clang/Basic/TokenKinds.def"
365	default: return KS_Disabled;
366	}
367	}
368
369	/// Returns true if the identifier represents a keyword in the
370	/// specified language.
371	bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
372	switch (getTokenKwStatus(LangOpts, K: getTokenID())) {
373	case KS_Enabled:
374	case KS_Extension:
375	return true;
376	default:
377	return false;
378	}
379	}
380
381	/// Returns true if the identifier represents a C++ keyword in the
382	/// specified language.
383	bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
384	if (!LangOpts.CPlusPlus \|\| !isKeyword(LangOpts))
385	return false;
386	// This is a C++ keyword if this identifier is not a keyword when checked
387	// using LangOptions without C++ support.
388	LangOptions LangOptsNoCPP = LangOpts;
389	LangOptsNoCPP.CPlusPlus = false;
390	LangOptsNoCPP.CPlusPlus11 = false;
391	LangOptsNoCPP.CPlusPlus20 = false;
392	return !isKeyword(LangOpts: LangOptsNoCPP);
393	}
394
395	ReservedIdentifierStatus
396	IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
397	StringRef Name = getName();
398
399	// '_' is a reserved identifier, but its use is so common (e.g. to store
400	// ignored values) that we don't warn on it.
401	if (Name.size() <= `1`)
402	return ReservedIdentifierStatus::NotReserved;
403
404	// [lex.name] p3
405	if (Name [`0`] == `'_'`) {
406
407	// Each name that begins with an underscore followed by an uppercase letter
408	// or another underscore is reserved.
409	if (Name [`1`] == `'_'`)
410	return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
411
412	if (`'A'` <= Name [`1`] && Name [`1`] <= `'Z'`)
413	return ReservedIdentifierStatus::
414	StartsWithUnderscoreFollowedByCapitalLetter;
415
416	// This is a bit misleading: it actually means it's only reserved if we're
417	// at global scope because it starts with an underscore.
418	return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
419	}
420
421	// Each name that contains a double underscore (__) is reserved.
422	if (LangOpts.CPlusPlus && Name.contains(Other: "__"))
423	return ReservedIdentifierStatus::ContainsDoubleUnderscore;
424
425	return ReservedIdentifierStatus::NotReserved;
426	}
427
428	ReservedLiteralSuffixIdStatus
429	IdentifierInfo::isReservedLiteralSuffixId() const {
430	StringRef Name = getName();
431
432	// Note: the diag::warn_deprecated_literal_operator_id diagnostic depends on
433	// this being the first check we do, so if this order changes, we have to fix
434	// that as well.
435	if (Name [`0`] != `'_'`)
436	return ReservedLiteralSuffixIdStatus::NotStartsWithUnderscore;
437
438	if (Name.contains(Other: "__"))
439	return ReservedLiteralSuffixIdStatus::ContainsDoubleUnderscore;
440
441	return ReservedLiteralSuffixIdStatus::NotReserved;
442	}
443
444	StringRef IdentifierInfo::deuglifiedName() const {
445	StringRef Name = getName();
446	if (Name.size() >= `2` && Name.front() == `'_'` &&
447	(Name [`1`] == `'_'` \|\| (Name [`1`] >= `'A'` && Name [`1`] <= `'Z'`)))
448	return Name.ltrim(Char: `'_'`);
449	return Name;
450	}
451
452	tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
453	// We use a perfect hash function here involving the length of the keyword,
454	// the first and third character. For preprocessor ID's there are no
455	// collisions (if there were, the switch below would complain about duplicate
456	// case values). Note that this depends on 'if' being null terminated.
457
458	#define HASH(LEN, FIRST, THIRD) \
459	(LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63)
460	#define CASE(LEN, FIRST, THIRD, NAME) \
461	case HASH(LEN, FIRST, THIRD): \
462	return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
463
464	unsigned Len = getLength();
465	if (Len < `2`) return tok::pp_not_keyword;
466	const char *Name = getNameStart();
467	switch (HASH(Len, Name[`0`], Name[`2`])) {
468	default: return tok::pp_not_keyword;
469	CASE( `2`, `'i'`, `'\0'`, if);
470	CASE( `4`, `'e'`, `'i'`, elif);
471	CASE( `4`, `'e'`, `'s'`, else);
472	CASE( `4`, `'l'`, `'n'`, line);
473	CASE( `4`, `'s'`, `'c'`, sccs);
474	CASE( `5`, `'e'`, `'b'`, embed);
475	CASE( `5`, `'e'`, `'d'`, endif);
476	CASE( `5`, `'e'`, `'r'`, error);
477	CASE( `5`, `'i'`, `'e'`, ident);
478	CASE( `5`, `'i'`, `'d'`, ifdef);
479	CASE( `5`, `'u'`, `'d'`, undef);
480
481	CASE( `6`, `'a'`, `'s'`, assert);
482	CASE( `6`, `'d'`, `'f'`, define);
483	CASE( `6`, `'i'`, `'n'`, ifndef);
484	CASE( `6`, `'i'`, `'p'`, import);
485	CASE( `6`, `'p'`, `'a'`, pragma);
486
487	CASE( `7`, `'d'`, `'f'`, defined);
488	CASE( `7`, `'e'`, `'i'`, elifdef);
489	CASE( `7`, `'i'`, `'c'`, include);
490	CASE( `7`, `'w'`, `'r'`, warning);
491
492	CASE( `8`, `'e'`, `'i'`, elifndef);
493	CASE( `8`, `'u'`, `'a'`, unassert);
494	CASE(`12`, `'i'`, `'c'`, include_next);
495
496	CASE(`14`, `'_'`, `'p'`, __public_macro);
497
498	CASE(`15`, `'_'`, `'p'`, __private_macro);
499
500	CASE(`16`, `'_'`, `'i'`, __include_macros);
501	#undef CASE
502	#undef HASH
503	}
504	}
505
506	//===----------------------------------------------------------------------===//
507	// Stats Implementation
508	//===----------------------------------------------------------------------===//
509
510	/// PrintStats - Print statistics about how well the identifier table is doing
511	/// at hashing identifiers.
512	void IdentifierTable::PrintStats() const {
513	unsigned NumBuckets = HashTable.getNumBuckets();
514	unsigned NumIdentifiers = HashTable.getNumItems();
515	unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
516	unsigned AverageIdentifierSize = `0`;
517	unsigned MaxIdentifierLength = `0`;
518
519	// TODO: Figure out maximum times an identifier had to probe for -stats.
520	for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
521	I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
522	unsigned IdLen = I ->getKeyLength();
523	AverageIdentifierSize += IdLen;
524	if (MaxIdentifierLength < IdLen)
525	MaxIdentifierLength = IdLen;
526	}
527
528	fprintf(stderr, format: "\n*** Identifier Table Stats:\n");
529	fprintf(stderr, format: "# Identifiers: %d\n", NumIdentifiers);
530	fprintf(stderr, format: "# Empty Buckets: %d\n", NumEmptyBuckets);
531	fprintf(stderr, format: "Hash density (#identifiers per bucket): %f\n",
532	NumIdentifiers/(double)NumBuckets);
533	fprintf(stderr, format: "Ave identifier length: %f\n",
534	(AverageIdentifierSize/(double)NumIdentifiers));
535	fprintf(stderr, format: "Max identifier length: %d\n", MaxIdentifierLength);
536
537	// Compute statistics about the memory allocated for identifiers.
538	HashTable.getAllocator().PrintStats();
539	}
540
541	//===----------------------------------------------------------------------===//
542	// SelectorTable Implementation
543	//===----------------------------------------------------------------------===//
544
545	unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
546	return DenseMapInfo<void*>::getHashValue(PtrVal: S.getAsOpaquePtr());
547	}
548
549	bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
550	assert(!Names.empty() && "must have >= 1 selector slots");
551	if (getNumArgs() != Names.size())
552	return false;
553	for (unsigned I = `0`, E = Names.size(); I != E; ++I) {
554	if (getNameForSlot(argIndex: I) != Names [I])
555	return false;
556	}
557	return true;
558	}
559
560	bool Selector::isUnarySelector(StringRef Name) const {
561	return isUnarySelector() && getNameForSlot(argIndex: `0`) == Name;
562	}
563
564	unsigned Selector::getNumArgs() const {
565	unsigned IIF = getIdentifierInfoFlag();
566	if (IIF <= ZeroArg)
567	return `0`;
568	if (IIF == OneArg)
569	return `1`;
570	// We point to a MultiKeywordSelector.
571	MultiKeywordSelector *SI = getMultiKeywordSelector();
572	return SI->getNumArgs();
573	}
574
575	const IdentifierInfo *
576	Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
577	if (getIdentifierInfoFlag() < MultiArg) {
578	assert(argIndex == `0` && "illegal keyword index");
579	return getAsIdentifierInfo();
580	}
581
582	// We point to a MultiKeywordSelector.
583	MultiKeywordSelector *SI = getMultiKeywordSelector();
584	return SI->getIdentifierInfoForSlot(i: argIndex);
585	}
586
587	StringRef Selector::getNameForSlot(unsigned int argIndex) const {
588	const IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
589	return II ? II->getName() : StringRef();
590	}
591
592	std::string MultiKeywordSelector::getName() const {
593	SmallString<`256`> Str;
594	llvm::raw_svector_ostream OS(Str);
595	for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
596	if (*I)
597	OS << (*I)->getName();
598	OS << `':'`;
599	}
600
601	return std::string (OS.str());
602	}
603
604	std::string Selector::getAsString() const {
605	if (isNull())
606	return "<null selector>";
607
608	if (getIdentifierInfoFlag() < MultiArg) {
609	const IdentifierInfo *II = getAsIdentifierInfo();
610
611	if (getNumArgs() == `0`) {
612	assert(II && "If the number of arguments is 0 then II is guaranteed to "
613	"not be null.");
614	return std::string (II->getName());
615	}
616
617	if (!II)
618	return ":";
619
620	return II->getName().str() + ":";
621	}
622
623	// We have a multiple keyword selector.
624	return getMultiKeywordSelector()->getName();
625	}
626
627	void Selector::print(llvm::raw_ostream &OS) const {
628	OS << getAsString();
629	}
630
631	LLVM_DUMP_METHOD void Selector::dump() const { print(OS&: llvm::errs()); }
632
633	/// Interpreting the given string using the normal CamelCase
634	/// conventions, determine whether the given string starts with the
635	/// given "word", which is assumed to end in a lowercase letter.
636	static bool startsWithWord(StringRef name, StringRef word) {
637	if (name.size() < word.size()) return false;
638	return ((name.size() == word.size() \|\| !isLowercase(c: name [word.size()])) &&
639	name.starts_with(Prefix: word));
640	}
641
642	ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
643	const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: `0`);
644	if (!first) return OMF_None;
645
646	StringRef name = first->getName();
647	if (sel.isUnarySelector()) {
648	if (name == "autorelease") return OMF_autorelease;
649	if (name == "dealloc") return OMF_dealloc;
650	if (name == "finalize") return OMF_finalize;
651	if (name == "release") return OMF_release;
652	if (name == "retain") return OMF_retain;
653	if (name == "retainCount") return OMF_retainCount;
654	if (name == "self") return OMF_self;
655	if (name == "initialize") return OMF_initialize;
656	}
657
658	if (name == "performSelector" \|\| name == "performSelectorInBackground" \|\|
659	name == "performSelectorOnMainThread")
660	return OMF_performSelector;
661
662	// The other method families may begin with a prefix of underscores.
663	name = name.ltrim(Char: `'_'`);
664
665	if (name.empty()) return OMF_None;
666	switch (name.front()) {
667	case `'a'`:
668	if (startsWithWord(name, word: "alloc")) return OMF_alloc;
669	break;
670	case `'c'`:
671	if (startsWithWord(name, word: "copy")) return OMF_copy;
672	break;
673	case `'i'`:
674	if (startsWithWord(name, word: "init")) return OMF_init;
675	break;
676	case `'m'`:
677	if (startsWithWord(name, word: "mutableCopy")) return OMF_mutableCopy;
678	break;
679	case `'n'`:
680	if (startsWithWord(name, word: "new")) return OMF_new;
681	break;
682	default:
683	break;
684	}
685
686	return OMF_None;
687	}
688
689	ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
690	const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: `0`);
691	if (!first) return OIT_None;
692
693	StringRef name = first->getName();
694
695	if (name.empty()) return OIT_None;
696	switch (name.front()) {
697	case `'a'`:
698	if (startsWithWord(name, word: "array")) return OIT_Array;
699	break;
700	case `'d'`:
701	if (startsWithWord(name, word: "default")) return OIT_ReturnsSelf;
702	if (startsWithWord(name, word: "dictionary")) return OIT_Dictionary;
703	break;
704	case `'s'`:
705	if (startsWithWord(name, word: "shared")) return OIT_ReturnsSelf;
706	if (startsWithWord(name, word: "standard")) return OIT_Singleton;
707	break;
708	case `'i'`:
709	if (startsWithWord(name, word: "init")) return OIT_Init;
710	break;
711	default:
712	break;
713	}
714	return OIT_None;
715	}
716
717	ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
718	const IdentifierInfo *first = sel.getIdentifierInfoForSlot(argIndex: `0`);
719	if (!first) return SFF_None;
720
721	StringRef name = first->getName();
722
723	switch (name.front()) {
724	case `'a'`:
725	if (name == "appendFormat") return SFF_NSString;
726	break;
727
728	case `'i'`:
729	if (name == "initWithFormat") return SFF_NSString;
730	break;
731
732	case `'l'`:
733	if (name == "localizedStringWithFormat") return SFF_NSString;
734	break;
735
736	case `'s'`:
737	if (name == "stringByAppendingFormat" \|\|
738	name == "stringWithFormat") return SFF_NSString;
739	break;
740	}
741	return SFF_None;
742	}
743
744	namespace {
745
746	struct SelectorTableImpl {
747	llvm::FoldingSet<MultiKeywordSelector> Table;
748	llvm::BumpPtrAllocator Allocator;
749	};
750
751	} // namespace
752
753	static SelectorTableImpl &getSelectorTableImpl(void *P) {
754	return *static_cast<SelectorTableImpl*>(P);
755	}
756
757	SmallString<`64`>
758	SelectorTable::constructSetterName(StringRef Name) {
759	SmallString<`64`> SetterName("set");
760	SetterName += Name;
761	SetterName [`3`] = toUppercase(c: SetterName [`3`]);
762	return SetterName;
763	}
764
765	Selector
766	SelectorTable::constructSetterSelector(IdentifierTable &Idents,
767	SelectorTable &SelTable,
768	const IdentifierInfo *Name) {
769	IdentifierInfo *SetterName =
770	&Idents.get(Name: constructSetterName(Name: Name->getName()));
771	return SelTable.getUnarySelector(ID: SetterName);
772	}
773
774	std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
775	StringRef Name = Sel.getNameForSlot(argIndex: `0`);
776	assert(Name.starts_with("set") && "invalid setter name");
777	return (Twine(toLowercase(c: Name [`3`])) + Name.drop_front(N: `4`)).str();
778	}
779
780	size_t SelectorTable::getTotalMemory() const {
781	SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
782	return SelTabImpl.Allocator.getTotalMemory();
783	}
784
785	Selector SelectorTable::getSelector(unsigned nKeys,
786	const IdentifierInfo **IIV) {
787	if (nKeys < `2`)
788	return Selector (IIV[`0`], nKeys);
789
790	SelectorTableImpl &SelTabImpl = getSelectorTableImpl(P: Impl);
791
792	// Unique selector, to guarantee there is one per name.
793	llvm::FoldingSetNodeID ID;
794	MultiKeywordSelector::Profile(ID, ArgTys: IIV, NumArgs: nKeys);
795
796	void InsertPos = nullptr*;
797	if (MultiKeywordSelector *SI =
798	SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
799	return Selector (SI);
800
801	// MultiKeywordSelector objects are not allocated with new because they have a
802	// variable size array (for parameter types) at the end of them.
803	unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
804	MultiKeywordSelector *SI =
805	(MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
806	Size, Alignment: alignof(MultiKeywordSelector));
807	new (SI) MultiKeywordSelector (nKeys, IIV);
808	SelTabImpl.Table.InsertNode(N: SI, InsertPos);
809	return Selector (SI);
810	}
811
812	SelectorTable::SelectorTable() {
813	Impl = new SelectorTableImpl ();
814	}
815
816	SelectorTable::~SelectorTable() {
817	delete &getSelectorTableImpl(P: Impl);
818	}
819
820	const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
821	switch (Operator) {
822	case OO_None:
823	case NUM_OVERLOADED_OPERATORS:
824	return nullptr;
825
826	#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
827	case OO_##Name: return Spelling;
828	#include "clang/Basic/OperatorKinds.def"
829	}
830
831	llvm_unreachable("Invalid OverloadedOperatorKind!");
832	}
833
834	StringRef clang::getNullabilitySpelling(NullabilityKind kind,
835	bool isContextSensitive) {
836	switch (kind) {
837	case NullabilityKind::NonNull:
838	return isContextSensitive ? "nonnull" : "_Nonnull";
839
840	case NullabilityKind::Nullable:
841	return isContextSensitive ? "nullable" : "_Nullable";
842
843	case NullabilityKind::NullableResult:
844	assert(!isContextSensitive &&
845	"_Nullable_result isn't supported as context-sensitive keyword");
846	return "_Nullable_result";
847
848	case NullabilityKind::Unspecified:
849	return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
850	}
851	llvm_unreachable("Unknown nullability kind.");
852	}
853
854	llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS,
855	NullabilityKind NK) {
856	switch (NK) {
857	case NullabilityKind::NonNull:
858	return OS << "NonNull";
859	case NullabilityKind::Nullable:
860	return OS << "Nullable";
861	case NullabilityKind::NullableResult:
862	return OS << "NullableResult";
863	case NullabilityKind::Unspecified:
864	return OS << "Unspecified";
865	}
866	llvm_unreachable("Unknown nullability kind.");
867	}
868
869	diag::kind
870	IdentifierTable::getFutureCompatDiagKind(const IdentifierInfo &II,
871	const LangOptions &LangOpts) {
872	assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
873
874	unsigned Flags = llvm::StringSwitch<unsigned>(II.getName())
875	#define KEYWORD(NAME, FLAGS) .Case(#NAME, FLAGS)
876	#include "clang/Basic/TokenKinds.def"
877	#undef KEYWORD
878	;
879
880	if (LangOpts.CPlusPlus) {
881	if ((Flags & KEYCXX11) == KEYCXX11)
882	return diag::warn_cxx11_keyword;
883
884	// char8_t is not modeled as a CXX20_KEYWORD because it's not
885	// unconditionally enabled in C++20 mode. (It can be disabled
886	// by -fno-char8_t.)
887	if (((Flags & KEYCXX20) == KEYCXX20) \|\|
888	((Flags & CHAR8SUPPORT) == CHAR8SUPPORT))
889	return diag::warn_cxx20_keyword;
890	} else {
891	if ((Flags & KEYC99) == KEYC99)
892	return diag::warn_c99_keyword;
893	if ((Flags & KEYC23) == KEYC23)
894	return diag::warn_c23_keyword;
895	}
896
897	llvm_unreachable(
898	"Keyword not known to come from a newer Standard or proposed Standard");
899	}
900

source code of clang/lib/Basic/IdentifierTable.cpp