1/****************************************************************************
2 * Copyright (C) 2012-2016 Woboq GmbH
3 * Olivier Goffart <contact at woboq.com>
4 * https://woboq.com/codebrowser.html
5 *
6 * This file is part of the Woboq Code Browser.
7 *
8 * Commercial License Usage:
9 * Licensees holding valid commercial licenses provided by Woboq may use
10 * this file in accordance with the terms contained in a written agreement
11 * between the licensee and Woboq.
12 * For further information see https://woboq.com/codebrowser.html
13 *
14 * Alternatively, this work may be used under a Creative Commons
15 * Attribution-NonCommercial-ShareAlike 3.0 (CC-BY-NC-SA 3.0) License.
16 * http://creativecommons.org/licenses/by-nc-sa/3.0/deed.en_US
17 * This license does not allow you to use the code browser to assist the
18 * development of your commercial software. If you intent to do so, consider
19 * purchasing a commercial licence.
20 ****************************************************************************/
21
22#include "commenthandler.h"
23#include "annotator.h"
24#include "generator.h"
25#include "stringbuilder.h"
26#include <cctype>
27#include <clang/AST/ASTContext.h>
28#include <clang/AST/CommentParser.h>
29#include <clang/AST/CommentVisitor.h>
30#include <clang/AST/DeclTemplate.h>
31#include <clang/AST/RawCommentList.h>
32#include <clang/Basic/SourceManager.h>
33#include <clang/Basic/Version.h>
34#include <clang/Lex/Preprocessor.h>
35#include <clang/Sema/Lookup.h>
36#include <clang/Sema/Sema.h>
37#include <iostream>
38
39clang::NamedDecl *parseDeclarationReference(llvm::StringRef Text, clang::Sema &Sema,
40 bool isFunction)
41{
42
43 clang::Preprocessor &PP = Sema.getPreprocessor();
44
45 auto Buf = llvm::MemoryBuffer::getMemBufferCopy(InputData: Text);
46 llvm::MemoryBuffer *Buf2 = &*Buf;
47#if CLANG_VERSION_MAJOR == 3 && CLANG_VERSION_MINOR <= 4
48 auto FID = PP.getSourceManager().createFileIDForMemBuffer(Buf);
49#else
50 auto FID = PP.getSourceManager().createFileID(Buffer: std::move(Buf));
51#endif
52
53
54#if CLANG_VERSION_MAJOR >= 12
55 auto MemBufRef = Buf2->getMemBufferRef();
56 clang::Lexer Lex(FID, MemBufRef, PP.getSourceManager(), PP.getLangOpts());
57#else
58 clang::Lexer Lex(FID, Buf2, PP.getSourceManager(), PP.getLangOpts());
59#endif
60
61 auto TuDecl = Sema.getASTContext().getTranslationUnitDecl();
62 clang::CXXScopeSpec SS;
63 clang::Token Tok, Next;
64 Lex.LexFromRawLexer(Result&: Tok);
65
66 for (; !Tok.is(K: clang::tok::eof); Tok = Next) {
67 Lex.LexFromRawLexer(Result&: Next);
68 clang::IdentifierInfo *II = nullptr;
69 if (Tok.is(K: clang::tok::raw_identifier)) {
70 II = PP.LookUpIdentifierInfo(Identifier&: Tok);
71 }
72
73 if (Tok.is(K: clang::tok::coloncolon)) {
74 SS.MakeGlobal(Context&: Sema.getASTContext(), ColonColonLoc: Tok.getLocation());
75 continue;
76 } else if (Tok.is(K: clang::tok::identifier)) {
77
78 if (Next.is(K: clang::tok::coloncolon)) {
79
80 clang::Sema::TemplateTy Template;
81 clang::UnqualifiedId Name;
82 Name.setIdentifier(Id: II, IdLoc: Tok.getLocation());
83 bool dummy;
84 auto TemplateKind = Sema.isTemplateName(S: Sema.getScopeForContext(Ctx: TuDecl), SS, hasTemplateKeyword: false,
85 Name, ObjectType: {}, EnteringContext: false, Template, MemberOfUnknownSpecialization&: dummy);
86 if (TemplateKind == clang::TNK_Non_template) {
87#if CLANG_VERSION_MAJOR >= 4
88 clang::Sema::NestedNameSpecInfo nameInfo(II, Tok.getLocation(),
89 Next.getLocation());
90 if (Sema.ActOnCXXNestedNameSpecifier(S: Sema.getScopeForContext(Ctx: TuDecl), IdInfo&: nameInfo,
91 EnteringContext: false, SS))
92#else
93 if (Sema.ActOnCXXNestedNameSpecifier(Sema.getScopeForContext(TuDecl), *II,
94 Tok.getLocation(), Next.getLocation(), {},
95 false, SS))
96#endif
97 {
98 SS.SetInvalid(Tok.getLocation());
99 }
100 } else if (auto T = Template.get().getAsTemplateDecl()) {
101 // FIXME: For template, it is a bit tricky
102 // It is still a bit broken but works in some cases for most normal functions
103 auto T2 = llvm::dyn_cast_or_null<clang::CXXRecordDecl>(Val: T->getTemplatedDecl());
104 if (T2) {
105 Lex.LexFromRawLexer(Result&: Tok);
106 if (!Tok.is(K: clang::tok::raw_identifier))
107 return nullptr;
108 II = PP.LookUpIdentifierInfo(Identifier&: Tok);
109 Lex.LexFromRawLexer(Result&: Next);
110 if (!Next.is(K: clang::tok::eof) && !Next.is(K: clang::tok::l_paren))
111 return nullptr;
112 auto Result = T2->lookup(Name: II);
113#if CLANG_VERSION_MAJOR >= 13
114 if (Result.isSingleResult())
115 return nullptr;
116#else
117 if (Result.size() != 1)
118 return nullptr;
119#endif
120 auto D = Result.front();
121 if (isFunction
122 && (llvm::isa<clang::RecordDecl>(Val: D)
123 || llvm::isa<clang::ClassTemplateDecl>(Val: D))) {
124 // TODO constructor
125 return nullptr;
126 }
127 return D;
128 }
129 }
130 Lex.LexFromRawLexer(Result&: Next);
131 continue;
132 }
133
134 if (Next.is(K: clang::tok::eof) || Next.is(K: clang::tok::l_paren)) {
135 clang::LookupResult Found(Sema, II, Tok.getLocation(),
136 clang::Sema::LookupOrdinaryName);
137 Found.suppressDiagnostics();
138
139 if (SS.isEmpty())
140 Sema.LookupQualifiedName(R&: Found, LookupCtx: TuDecl);
141 else {
142 clang::DeclContext *DC = Sema.computeDeclContext(SS);
143 Sema.LookupQualifiedName(R&: Found, LookupCtx: DC ? DC : TuDecl);
144 }
145
146
147 if (Found.isSingleResult()) {
148 auto Decl = Found.getFoundDecl();
149 if (isFunction
150 && (llvm::isa<clang::RecordDecl>(Val: Decl)
151 || llvm::isa<clang::ClassTemplateDecl>(Val: Decl))) {
152 // TODO handle constructors.
153 return nullptr;
154 }
155 return Decl;
156 }
157
158 if (Found.isOverloadedResult() && Next.is(K: clang::tok::l_paren)) {
159 // TODO
160 }
161 return nullptr;
162 }
163 }
164 if (Tok.is(K: clang::tok::tilde) || Tok.is(K: clang::tok::kw_operator)) {
165 // TODO
166 return nullptr;
167 }
168
169 if (!isFunction)
170 return nullptr;
171 SS = {};
172 // Then it is probably the return type, just skip it.
173 }
174 return nullptr;
175}
176
177struct CommentHandler::CommentVisitor : clang::comments::ConstCommentVisitor<CommentVisitor>
178{
179 typedef clang::comments::ConstCommentVisitor<CommentVisitor> Base;
180 CommentVisitor(Annotator &annotator, Generator &generator,
181 const clang::comments::CommandTraits &traits, clang::Sema &Sema)
182 : annotator(annotator)
183 , generator(generator)
184 , traits(traits)
185 , Sema(Sema)
186 {
187 }
188 Annotator &annotator;
189 Generator &generator;
190 const clang::comments::CommandTraits &traits;
191 clang::Sema &Sema;
192
193 clang::NamedDecl *Decl = nullptr;
194 std::string DeclRef;
195 std::vector<std::pair<std::string, Doc>> SubDocs; // typically for enum values
196
197 void visit(const clang::comments::Comment *C)
198 {
199 Base::visit(C);
200 for (auto it = C->child_begin(); it != C->child_end(); ++it)
201 visit(C: *it);
202 }
203
204 // Inline content.
205 // void visitTextComment(const clang::comments::TextComment *C);
206 void visitInlineCommandComment(const clang::comments::InlineCommandComment *C)
207 {
208 tag(className: "command", range: C->getCommandNameRange());
209 for (unsigned int i = 0; i < C->getNumArgs(); ++i)
210 tag(className: "arg", range: C->getArgRange(Idx: i));
211 }
212 void visitHTMLStartTagComment(const clang::comments::HTMLStartTagComment *C)
213 {
214 tag(className: "tag", range: C->getSourceRange());
215 /*for (int i = 0; i < C->getNumAttrs(); ++i) {
216 auto attr = C->getAttr(i);
217 tag("attr", attr.getNameRange());
218 }*/
219 }
220 void visitHTMLEndTagComment(const clang::comments::HTMLEndTagComment *C)
221 {
222 tag(className: "tag", range: C->getSourceRange());
223 }
224
225 // Block content.
226 // void visitParagraphComment(const clang::comments::ParagraphComment *C);
227 void visitBlockCommandComment(const clang::comments::BlockCommandComment *C)
228 {
229 auto nameRange = C->getCommandNameRange(Traits: traits);
230 tag(className: "command", range: { C->getSourceRange().getBegin(), nameRange.getEnd().getLocWithOffset(Offset: -1) });
231 for (unsigned int i = 0; i < C->getNumArgs(); ++i)
232 tag(className: "arg", range: C->getArgRange(Idx: i));
233 if (C->getCommandName(Traits: traits) == "value")
234 parseEnumValue(C);
235 }
236 // void visitParamCommandComment(const clang::comments::ParamCommandComment *C);
237 // void visitTParamCommandComment(const clang::comments::TParamCommandComment *C);
238 /*void visitVerbatimBlockComment(const clang::comments::VerbatimBlockComment *C) {
239 Base::visitVerbatimBlockComment(C);
240 FIXME
241 // highlight the closing command
242 auto end = C->getLocEnd();
243 tag("arg", {end.getLocWithOffset(-C->getCloseName().size()) ,end});
244 }*/
245 void visitVerbatimBlockLineComment(const clang::comments::VerbatimBlockLineComment *C)
246 {
247 tag(className: "verb", range: C->getSourceRange());
248 }
249 void visitVerbatimLineComment(const clang::comments::VerbatimLineComment *C)
250 {
251 auto R = C->getTextRange();
252 // We need to adjust because the text starts right after the name, which overlap with the
253 // command. And also includes the end of line, which is useless.
254 Base::visitVerbatimLineComment(C);
255
256 std::string ref;
257 auto Info = traits.getCommandInfo(CommandID: C->getCommandID());
258 if (Info->IsDeclarationCommand) {
259 auto D = parseDeclarationReference(Text: C->getText(), Sema,
260 isFunction: Info->IsFunctionDeclarationCommand
261 || Info->getID()
262 == clang::comments::CommandTraits::KCI_fn);
263 if (D) {
264 Decl = D;
265 DeclRef = annotator.getVisibleRef(Decl);
266 ref = DeclRef;
267 }
268 }
269 tag(className: "verb", range: { R.getBegin().getLocWithOffset(Offset: +1), R.getEnd().getLocWithOffset(Offset: -1) }, ref);
270 }
271
272 // void visitFullComment(const clang::comments::FullComment *C);
273
274private:
275 void tag(llvm::StringRef className, clang::SourceRange range,
276 llvm::StringRef ref = llvm::StringRef())
277 {
278 int len = range.getEnd().getRawEncoding() - range.getBegin().getRawEncoding() + 1;
279 if (len > 0) {
280 std::string attr;
281 if (ref.empty()) {
282 attr = "class=\"" % className % "\"";
283 } else {
284 attr = "class=\"" % className % "\" data-ref=\"" % ref % "\"";
285 }
286 auto offset = annotator.getSourceMgr().getFileOffset(SpellingLoc: range.getBegin());
287 generator.addTag(name: "span", attributes: attr, pos: offset, len);
288 }
289 }
290
291 // Parse the \value command (for enum values)
292 void parseEnumValue(const clang::comments::BlockCommandComment *C)
293 {
294 auto ED = llvm::dyn_cast_or_null<clang::EnumDecl>(Val: Decl);
295 if (!ED)
296 return;
297 auto P = C->getParagraph();
298 if (!P)
299 return;
300 auto valueStartLoc = P->getSourceRange().getBegin();
301 const char *data = annotator.getSourceMgr().getCharacterData(SL: valueStartLoc);
302 auto begin = data;
303 while (clang::isWhitespace(c: *begin))
304 begin++;
305 auto end = begin;
306#if CLANG_VERSION_MAJOR >= 14
307 while (clang::isAsciiIdentifierContinue(c: *end))
308 end++;
309#else
310 while (clang::isIdentifierBody(*end))
311 end++;
312#endif
313 llvm::StringRef value(begin, end - begin);
314
315 auto it = std::find_if(
316 first: ED->enumerator_begin(), last: ED->enumerator_end(),
317 pred: [&value](const clang::EnumConstantDecl *EC) { return value == EC->getName(); });
318 if (it == ED->enumerator_end())
319 return;
320 auto ref = annotator.getVisibleRef(Decl: *it);
321
322 tag(className: "arg",
323 range: { valueStartLoc.getLocWithOffset(Offset: begin - data),
324 valueStartLoc.getLocWithOffset(Offset: end - data) },
325 ref);
326
327 auto range = C->getSourceRange();
328 auto len = range.getEnd().getRawEncoding() - range.getBegin().getRawEncoding() + 1;
329 auto ctn = std::string(annotator.getSourceMgr().getCharacterData(SL: range.getBegin()), len);
330 SubDocs.push_back(x: { std::move(ref), Doc { .content: std::move(ctn), .loc: range.getBegin() } });
331 }
332};
333
334static void handleUrlsInComment(Generator &generator, llvm::StringRef rawString, int commentStart)
335{
336 std::size_t pos = 0;
337 while ((pos = rawString.find(Str: "http", From: pos)) != llvm::StringRef::npos) {
338 int begin = pos;
339 pos += 4;
340 if (begin != 0
341 && llvm::StringRef(" \t/*[]()<>|:\"'{}").find(C: rawString[begin - 1])
342 == llvm::StringRef::npos) {
343 // the URL need to be the first character, or follow a space or one of the character
344 continue;
345 }
346 if (pos < rawString.size() && rawString[pos] == 's')
347 pos++;
348 if (!rawString.substr(Start: pos).startswith(Prefix: "://"))
349 continue;
350 pos += 3;
351 // We have an URL
352
353 llvm::StringRef urlChars = "-._~:/?#[]@!$&'()*+,;=%"; // chars valid in the URL
354 while (pos < rawString.size()
355 && (std::isalnum(rawString[pos])
356 || urlChars.find(C: rawString[pos]) != llvm::StringRef::npos))
357 pos++;
358
359 // don't end with a period
360 if (rawString[pos - 1] == '.')
361 pos--;
362
363 // Don't end with a closing parenthese or bracket unless the URL contains an opening one
364 // (e.g. wikipedia urls)
365 auto candidate = rawString.substr(Start: begin, N: pos - begin);
366 if (rawString[pos - 1] == ')' && candidate.find(C: '(') == llvm::StringRef::npos)
367 pos--;
368 if (rawString[pos - 1] == ']' && candidate.find(C: '[') == llvm::StringRef::npos)
369 pos--;
370
371 // don't end with a period
372 if (rawString[pos - 1] == '.')
373 pos--;
374
375 auto len = pos - begin;
376 generator.addTag(name: "a", attributes: "href=\"" % rawString.substr(Start: begin, N: len) % "\"", pos: commentStart + begin,
377 len);
378 }
379}
380
381void CommentHandler::handleComment(Annotator &A, Generator &generator, clang::Sema &Sema,
382 const char *bufferStart, int commentStart, int len,
383 clang::SourceLocation searchLocBegin,
384 clang::SourceLocation searchLocEnd,
385 clang::SourceLocation commentLoc)
386{
387 llvm::StringRef rawString(bufferStart + commentStart, len);
388
389 handleUrlsInComment(generator, rawString, commentStart);
390
391 std::string attributes;
392
393 if ((rawString.ltrim().startswith(Prefix: "/**") && !rawString.ltrim().startswith(Prefix: "/***"))
394 || rawString.ltrim().startswith(Prefix: "/*!") || rawString.ltrim().startswith(Prefix: "//!")
395 || (rawString.ltrim().startswith(Prefix: "///") && !rawString.ltrim().startswith(Prefix: "////")))
396#if CLANG_VERSION_MAJOR == 3 && CLANG_VERSION_MINOR <= 4
397 if (rawString.find("deprecated")
398 == rawString.npos) // workaround crash in comments::Sema::checkDeprecatedCommand
399#endif
400 {
401 attributes = "class=\"doc\"";
402
403 clang::Preprocessor &PP = Sema.getPreprocessor();
404 clang::comments::CommandTraits traits(PP.getPreprocessorAllocator(),
405 clang::CommentOptions());
406 traits.registerBlockCommand(CommandName: "value"); // enum value
407#if CLANG_VERSION_MAJOR == 3 && CLANG_VERSION_MINOR <= 4
408 traits.registerBlockCommand("deprecated"); // avoid typo correction leading to crash.
409#endif
410 clang::comments::Lexer lexer(PP.getPreprocessorAllocator(), PP.getDiagnostics(), traits,
411 commentLoc, bufferStart + commentStart,
412 bufferStart + commentStart + len);
413 clang::comments::Sema sema(PP.getPreprocessorAllocator(), PP.getSourceManager(),
414 PP.getDiagnostics(), traits, &PP);
415 clang::comments::Parser parser(lexer, sema, PP.getPreprocessorAllocator(),
416 PP.getSourceManager(), PP.getDiagnostics(), traits);
417 auto fullComment = parser.parseFullComment();
418 CommentVisitor visitor { A, generator, traits, Sema };
419 visitor.visit(C: fullComment);
420 if (!visitor.DeclRef.empty()) {
421 for (auto &p : visitor.SubDocs)
422 docs.insert(x: std::move(p));
423 docs.insert(x: { std::move(visitor.DeclRef), { .content: rawString.str(), .loc: commentLoc } });
424 generator.addTag(name: "i", attributes, pos: commentStart, len);
425 return;
426 }
427 }
428
429
430 // Try to find a matching declaration
431 const auto &dof = decl_offsets;
432 // is there one and one single decl in that range.
433 auto it_before = dof.lower_bound(x: searchLocBegin);
434 auto it_after = dof.upper_bound(x: searchLocEnd);
435 if (it_before != dof.end() && it_after != dof.begin() && it_before == (--it_after)) {
436 if (it_before->second.second) {
437 docs.insert(x: { it_before->second.first, { .content: rawString.str(), .loc: commentLoc } });
438 } else {
439 attributes %= " data-doc=\"" % it_before->second.first % "\"";
440 }
441 }
442
443 generator.addTag(name: "i", attributes, pos: commentStart, len);
444}
445

source code of codebrowser/generator/commenthandler.cpp