1 | //===---- ParseStmtAsm.cpp - Assembly Statement Parser --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements parsing for GCC and Microsoft inline assembly. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/AST/ASTContext.h" |
14 | #include "clang/Basic/Diagnostic.h" |
15 | #include "clang/Basic/TargetInfo.h" |
16 | #include "clang/Parse/Parser.h" |
17 | #include "clang/Parse/RAIIObjectsForParser.h" |
18 | #include "llvm/ADT/SmallString.h" |
19 | #include "llvm/ADT/StringExtras.h" |
20 | #include "llvm/MC/MCAsmInfo.h" |
21 | #include "llvm/MC/MCContext.h" |
22 | #include "llvm/MC/MCInstPrinter.h" |
23 | #include "llvm/MC/MCInstrInfo.h" |
24 | #include "llvm/MC/MCObjectFileInfo.h" |
25 | #include "llvm/MC/MCParser/MCAsmParser.h" |
26 | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
27 | #include "llvm/MC/MCRegisterInfo.h" |
28 | #include "llvm/MC/MCStreamer.h" |
29 | #include "llvm/MC/MCSubtargetInfo.h" |
30 | #include "llvm/MC/MCTargetOptions.h" |
31 | #include "llvm/MC/TargetRegistry.h" |
32 | #include "llvm/Support/SourceMgr.h" |
33 | #include "llvm/Support/TargetSelect.h" |
34 | using namespace clang; |
35 | |
36 | namespace { |
37 | class ClangAsmParserCallback : public llvm::MCAsmParserSemaCallback { |
38 | Parser &TheParser; |
39 | SourceLocation AsmLoc; |
40 | StringRef AsmString; |
41 | |
42 | /// The tokens we streamed into AsmString and handed off to MC. |
43 | ArrayRef<Token> AsmToks; |
44 | |
45 | /// The offset of each token in AsmToks within AsmString. |
46 | ArrayRef<unsigned> AsmTokOffsets; |
47 | |
48 | public: |
49 | ClangAsmParserCallback(Parser &P, SourceLocation Loc, StringRef AsmString, |
50 | ArrayRef<Token> Toks, ArrayRef<unsigned> Offsets) |
51 | : TheParser(P), AsmLoc(Loc), AsmString(AsmString), AsmToks(Toks), |
52 | AsmTokOffsets(Offsets) { |
53 | assert(AsmToks.size() == AsmTokOffsets.size()); |
54 | } |
55 | |
56 | void LookupInlineAsmIdentifier(StringRef &LineBuf, |
57 | llvm::InlineAsmIdentifierInfo &Info, |
58 | bool IsUnevaluatedContext) override; |
59 | |
60 | StringRef LookupInlineAsmLabel(StringRef Identifier, llvm::SourceMgr &LSM, |
61 | llvm::SMLoc Location, |
62 | bool Create) override; |
63 | |
64 | bool LookupInlineAsmField(StringRef Base, StringRef Member, |
65 | unsigned &Offset) override { |
66 | return TheParser.getActions().LookupInlineAsmField(Base, Member, Offset, |
67 | AsmLoc); |
68 | } |
69 | |
70 | static void DiagHandlerCallback(const llvm::SMDiagnostic &D, void *Context) { |
71 | ((ClangAsmParserCallback *)Context)->handleDiagnostic(D); |
72 | } |
73 | |
74 | private: |
75 | /// Collect the appropriate tokens for the given string. |
76 | void findTokensForString(StringRef Str, SmallVectorImpl<Token> &TempToks, |
77 | const Token *&FirstOrigToken) const; |
78 | |
79 | SourceLocation translateLocation(const llvm::SourceMgr &LSM, |
80 | llvm::SMLoc SMLoc); |
81 | |
82 | void handleDiagnostic(const llvm::SMDiagnostic &D); |
83 | }; |
84 | } |
85 | |
86 | void ClangAsmParserCallback::LookupInlineAsmIdentifier( |
87 | StringRef &LineBuf, llvm::InlineAsmIdentifierInfo &Info, |
88 | bool IsUnevaluatedContext) { |
89 | // Collect the desired tokens. |
90 | SmallVector<Token, 16> LineToks; |
91 | const Token *FirstOrigToken = nullptr; |
92 | findTokensForString(Str: LineBuf, TempToks&: LineToks, FirstOrigToken); |
93 | |
94 | unsigned NumConsumedToks; |
95 | ExprResult Result = TheParser.ParseMSAsmIdentifier(LineToks, NumLineToksConsumed&: NumConsumedToks, |
96 | IsUnevaluated: IsUnevaluatedContext); |
97 | |
98 | // If we consumed the entire line, tell MC that. |
99 | // Also do this if we consumed nothing as a way of reporting failure. |
100 | if (NumConsumedToks == 0 || NumConsumedToks == LineToks.size()) { |
101 | // By not modifying LineBuf, we're implicitly consuming it all. |
102 | |
103 | // Otherwise, consume up to the original tokens. |
104 | } else { |
105 | assert(FirstOrigToken && "not using original tokens?" ); |
106 | |
107 | // Since we're using original tokens, apply that offset. |
108 | assert(FirstOrigToken[NumConsumedToks].getLocation() == |
109 | LineToks[NumConsumedToks].getLocation()); |
110 | unsigned FirstIndex = FirstOrigToken - AsmToks.begin(); |
111 | unsigned LastIndex = FirstIndex + NumConsumedToks - 1; |
112 | |
113 | // The total length we've consumed is the relative offset |
114 | // of the last token we consumed plus its length. |
115 | unsigned TotalOffset = |
116 | (AsmTokOffsets[LastIndex] + AsmToks[LastIndex].getLength() - |
117 | AsmTokOffsets[FirstIndex]); |
118 | LineBuf = LineBuf.substr(Start: 0, N: TotalOffset); |
119 | } |
120 | |
121 | // Initialize Info with the lookup result. |
122 | if (!Result.isUsable()) |
123 | return; |
124 | TheParser.getActions().FillInlineAsmIdentifierInfo(Res: Result.get(), Info); |
125 | } |
126 | |
127 | StringRef ClangAsmParserCallback::LookupInlineAsmLabel(StringRef Identifier, |
128 | llvm::SourceMgr &LSM, |
129 | llvm::SMLoc Location, |
130 | bool Create) { |
131 | SourceLocation Loc = translateLocation(LSM, SMLoc: Location); |
132 | LabelDecl *Label = |
133 | TheParser.getActions().GetOrCreateMSAsmLabel(ExternalLabelName: Identifier, Location: Loc, AlwaysCreate: Create); |
134 | return Label->getMSAsmLabel(); |
135 | } |
136 | |
137 | void ClangAsmParserCallback::findTokensForString( |
138 | StringRef Str, SmallVectorImpl<Token> &TempToks, |
139 | const Token *&FirstOrigToken) const { |
140 | // For now, assert that the string we're working with is a substring |
141 | // of what we gave to MC. This lets us use the original tokens. |
142 | assert(!std::less<const char *>()(Str.begin(), AsmString.begin()) && |
143 | !std::less<const char *>()(AsmString.end(), Str.end())); |
144 | |
145 | // Try to find a token whose offset matches the first token. |
146 | unsigned FirstCharOffset = Str.begin() - AsmString.begin(); |
147 | const unsigned *FirstTokOffset = |
148 | llvm::lower_bound(Range: AsmTokOffsets, Value&: FirstCharOffset); |
149 | |
150 | // For now, assert that the start of the string exactly |
151 | // corresponds to the start of a token. |
152 | assert(*FirstTokOffset == FirstCharOffset); |
153 | |
154 | // Use all the original tokens for this line. (We assume the |
155 | // end of the line corresponds cleanly to a token break.) |
156 | unsigned FirstTokIndex = FirstTokOffset - AsmTokOffsets.begin(); |
157 | FirstOrigToken = &AsmToks[FirstTokIndex]; |
158 | unsigned LastCharOffset = Str.end() - AsmString.begin(); |
159 | for (unsigned i = FirstTokIndex, e = AsmTokOffsets.size(); i != e; ++i) { |
160 | if (AsmTokOffsets[i] >= LastCharOffset) |
161 | break; |
162 | TempToks.push_back(Elt: AsmToks[i]); |
163 | } |
164 | } |
165 | |
166 | SourceLocation |
167 | ClangAsmParserCallback::translateLocation(const llvm::SourceMgr &LSM, |
168 | llvm::SMLoc SMLoc) { |
169 | // Compute an offset into the inline asm buffer. |
170 | // FIXME: This isn't right if .macro is involved (but hopefully, no |
171 | // real-world code does that). |
172 | const llvm::MemoryBuffer *LBuf = |
173 | LSM.getMemoryBuffer(i: LSM.FindBufferContainingLoc(Loc: SMLoc)); |
174 | unsigned Offset = SMLoc.getPointer() - LBuf->getBufferStart(); |
175 | |
176 | // Figure out which token that offset points into. |
177 | const unsigned *TokOffsetPtr = llvm::lower_bound(Range&: AsmTokOffsets, Value&: Offset); |
178 | unsigned TokIndex = TokOffsetPtr - AsmTokOffsets.begin(); |
179 | unsigned TokOffset = *TokOffsetPtr; |
180 | |
181 | // If we come up with an answer which seems sane, use it; otherwise, |
182 | // just point at the __asm keyword. |
183 | // FIXME: Assert the answer is sane once we handle .macro correctly. |
184 | SourceLocation Loc = AsmLoc; |
185 | if (TokIndex < AsmToks.size()) { |
186 | const Token &Tok = AsmToks[TokIndex]; |
187 | Loc = Tok.getLocation(); |
188 | Loc = Loc.getLocWithOffset(Offset: Offset - TokOffset); |
189 | } |
190 | return Loc; |
191 | } |
192 | |
193 | void ClangAsmParserCallback::handleDiagnostic(const llvm::SMDiagnostic &D) { |
194 | const llvm::SourceMgr &LSM = *D.getSourceMgr(); |
195 | SourceLocation Loc = translateLocation(LSM, SMLoc: D.getLoc()); |
196 | TheParser.Diag(Loc, diag::err_inline_ms_asm_parsing) << D.getMessage(); |
197 | } |
198 | |
199 | /// Parse an identifier in an MS-style inline assembly block. |
200 | ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks, |
201 | unsigned &NumLineToksConsumed, |
202 | bool IsUnevaluatedContext) { |
203 | // Push a fake token on the end so that we don't overrun the token |
204 | // stream. We use ';' because it expression-parsing should never |
205 | // overrun it. |
206 | const tok::TokenKind EndOfStream = tok::semi; |
207 | Token EndOfStreamTok; |
208 | EndOfStreamTok.startToken(); |
209 | EndOfStreamTok.setKind(EndOfStream); |
210 | LineToks.push_back(Elt: EndOfStreamTok); |
211 | |
212 | // Also copy the current token over. |
213 | LineToks.push_back(Elt: Tok); |
214 | |
215 | PP.EnterTokenStream(Toks: LineToks, /*DisableMacroExpansions*/ DisableMacroExpansion: true, |
216 | /*IsReinject*/ true); |
217 | |
218 | // Clear the current token and advance to the first token in LineToks. |
219 | ConsumeAnyToken(); |
220 | |
221 | // Parse an optional scope-specifier if we're in C++. |
222 | CXXScopeSpec SS; |
223 | if (getLangOpts().CPlusPlus) |
224 | ParseOptionalCXXScopeSpecifier(SS, /*ObjectType=*/nullptr, |
225 | /*ObjectHasErrors=*/false, |
226 | /*EnteringContext=*/false); |
227 | |
228 | // Require an identifier here. |
229 | SourceLocation TemplateKWLoc; |
230 | UnqualifiedId Id; |
231 | bool Invalid = true; |
232 | ExprResult Result; |
233 | if (Tok.is(K: tok::kw_this)) { |
234 | Result = ParseCXXThis(); |
235 | Invalid = false; |
236 | } else { |
237 | Invalid = |
238 | ParseUnqualifiedId(SS, /*ObjectType=*/nullptr, |
239 | /*ObjectHadErrors=*/false, |
240 | /*EnteringContext=*/false, |
241 | /*AllowDestructorName=*/false, |
242 | /*AllowConstructorName=*/false, |
243 | /*AllowDeductionGuide=*/false, TemplateKWLoc: &TemplateKWLoc, Result&: Id); |
244 | // Perform the lookup. |
245 | Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, |
246 | IsUnevaluatedContext); |
247 | } |
248 | // While the next two tokens are 'period' 'identifier', repeatedly parse it as |
249 | // a field access. We have to avoid consuming assembler directives that look |
250 | // like '.' 'else'. |
251 | while (Result.isUsable() && Tok.is(K: tok::period)) { |
252 | Token IdTok = PP.LookAhead(N: 0); |
253 | if (IdTok.isNot(K: tok::identifier)) |
254 | break; |
255 | ConsumeToken(); // Consume the period. |
256 | IdentifierInfo *Id = Tok.getIdentifierInfo(); |
257 | ConsumeToken(); // Consume the identifier. |
258 | Result = Actions.LookupInlineAsmVarDeclField(RefExpr: Result.get(), Member: Id->getName(), |
259 | AsmLoc: Tok.getLocation()); |
260 | } |
261 | |
262 | // Figure out how many tokens we are into LineToks. |
263 | unsigned LineIndex = 0; |
264 | if (Tok.is(K: EndOfStream)) { |
265 | LineIndex = LineToks.size() - 2; |
266 | } else { |
267 | while (LineToks[LineIndex].getLocation() != Tok.getLocation()) { |
268 | LineIndex++; |
269 | assert(LineIndex < LineToks.size() - 2); // we added two extra tokens |
270 | } |
271 | } |
272 | |
273 | // If we've run into the poison token we inserted before, or there |
274 | // was a parsing error, then claim the entire line. |
275 | if (Invalid || Tok.is(K: EndOfStream)) { |
276 | NumLineToksConsumed = LineToks.size() - 2; |
277 | } else { |
278 | // Otherwise, claim up to the start of the next token. |
279 | NumLineToksConsumed = LineIndex; |
280 | } |
281 | |
282 | // Finally, restore the old parsing state by consuming all the tokens we |
283 | // staged before, implicitly killing off the token-lexer we pushed. |
284 | for (unsigned i = 0, e = LineToks.size() - LineIndex - 2; i != e; ++i) { |
285 | ConsumeAnyToken(); |
286 | } |
287 | assert(Tok.is(EndOfStream)); |
288 | ConsumeToken(); |
289 | |
290 | // Leave LineToks in its original state. |
291 | LineToks.pop_back(); |
292 | LineToks.pop_back(); |
293 | |
294 | return Result; |
295 | } |
296 | |
297 | /// Turn a sequence of our tokens back into a string that we can hand |
298 | /// to the MC asm parser. |
299 | static bool buildMSAsmString(Preprocessor &PP, SourceLocation AsmLoc, |
300 | ArrayRef<Token> AsmToks, |
301 | SmallVectorImpl<unsigned> &TokOffsets, |
302 | SmallString<512> &Asm) { |
303 | assert(!AsmToks.empty() && "Didn't expect an empty AsmToks!" ); |
304 | |
305 | // Is this the start of a new assembly statement? |
306 | bool isNewStatement = true; |
307 | |
308 | for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { |
309 | const Token &Tok = AsmToks[i]; |
310 | |
311 | // Start each new statement with a newline and a tab. |
312 | if (!isNewStatement && (Tok.is(K: tok::kw_asm) || Tok.isAtStartOfLine())) { |
313 | Asm += "\n\t" ; |
314 | isNewStatement = true; |
315 | } |
316 | |
317 | // Preserve the existence of leading whitespace except at the |
318 | // start of a statement. |
319 | if (!isNewStatement && Tok.hasLeadingSpace()) |
320 | Asm += ' '; |
321 | |
322 | // Remember the offset of this token. |
323 | TokOffsets.push_back(Elt: Asm.size()); |
324 | |
325 | // Don't actually write '__asm' into the assembly stream. |
326 | if (Tok.is(K: tok::kw_asm)) { |
327 | // Complain about __asm at the end of the stream. |
328 | if (i + 1 == e) { |
329 | PP.Diag(AsmLoc, diag::err_asm_empty); |
330 | return true; |
331 | } |
332 | |
333 | continue; |
334 | } |
335 | |
336 | // Append the spelling of the token. |
337 | SmallString<32> SpellingBuffer; |
338 | bool SpellingInvalid = false; |
339 | Asm += PP.getSpelling(Tok, Buffer&: SpellingBuffer, Invalid: &SpellingInvalid); |
340 | assert(!SpellingInvalid && "spelling was invalid after correct parse?" ); |
341 | |
342 | // We are no longer at the start of a statement. |
343 | isNewStatement = false; |
344 | } |
345 | |
346 | // Ensure that the buffer is null-terminated. |
347 | Asm.push_back(Elt: '\0'); |
348 | Asm.pop_back(); |
349 | |
350 | assert(TokOffsets.size() == AsmToks.size()); |
351 | return false; |
352 | } |
353 | |
354 | // Determine if this is a GCC-style asm statement. |
355 | bool Parser::isGCCAsmStatement(const Token &TokAfterAsm) const { |
356 | return TokAfterAsm.is(K: tok::l_paren) || isGNUAsmQualifier(TokAfterAsm); |
357 | } |
358 | |
359 | bool Parser::isGNUAsmQualifier(const Token &TokAfterAsm) const { |
360 | return getGNUAsmQualifier(Tok: TokAfterAsm) != GNUAsmQualifiers::AQ_unspecified; |
361 | } |
362 | |
363 | /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, |
364 | /// this routine is called to collect the tokens for an MS asm statement. |
365 | /// |
366 | /// [MS] ms-asm-statement: |
367 | /// ms-asm-block |
368 | /// ms-asm-block ms-asm-statement |
369 | /// |
370 | /// [MS] ms-asm-block: |
371 | /// '__asm' ms-asm-line '\n' |
372 | /// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] |
373 | /// |
374 | /// [MS] ms-asm-instruction-block |
375 | /// ms-asm-line |
376 | /// ms-asm-line '\n' ms-asm-instruction-block |
377 | /// |
378 | StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { |
379 | SourceManager &SrcMgr = PP.getSourceManager(); |
380 | SourceLocation EndLoc = AsmLoc; |
381 | SmallVector<Token, 4> AsmToks; |
382 | |
383 | bool SingleLineMode = true; |
384 | unsigned BraceNesting = 0; |
385 | unsigned short savedBraceCount = BraceCount; |
386 | bool = false; |
387 | FileID FID; |
388 | unsigned LineNo = 0; |
389 | unsigned NumTokensRead = 0; |
390 | SmallVector<SourceLocation, 4> LBraceLocs; |
391 | bool SkippedStartOfLine = false; |
392 | |
393 | if (Tok.is(K: tok::l_brace)) { |
394 | // Braced inline asm: consume the opening brace. |
395 | SingleLineMode = false; |
396 | BraceNesting = 1; |
397 | EndLoc = ConsumeBrace(); |
398 | LBraceLocs.push_back(Elt: EndLoc); |
399 | ++NumTokensRead; |
400 | } else { |
401 | // Single-line inline asm; compute which line it is on. |
402 | std::pair<FileID, unsigned> ExpAsmLoc = |
403 | SrcMgr.getDecomposedExpansionLoc(Loc: EndLoc); |
404 | FID = ExpAsmLoc.first; |
405 | LineNo = SrcMgr.getLineNumber(FID, FilePos: ExpAsmLoc.second); |
406 | LBraceLocs.push_back(Elt: SourceLocation()); |
407 | } |
408 | |
409 | SourceLocation TokLoc = Tok.getLocation(); |
410 | do { |
411 | // If we hit EOF, we're done, period. |
412 | if (isEofOrEom()) |
413 | break; |
414 | |
415 | if (!InAsmComment && Tok.is(K: tok::l_brace)) { |
416 | // Consume the opening brace. |
417 | SkippedStartOfLine = Tok.isAtStartOfLine(); |
418 | AsmToks.push_back(Elt: Tok); |
419 | EndLoc = ConsumeBrace(); |
420 | BraceNesting++; |
421 | LBraceLocs.push_back(Elt: EndLoc); |
422 | TokLoc = Tok.getLocation(); |
423 | ++NumTokensRead; |
424 | continue; |
425 | } else if (!InAsmComment && Tok.is(K: tok::semi)) { |
426 | // A semicolon in an asm is the start of a comment. |
427 | InAsmComment = true; |
428 | if (!SingleLineMode) { |
429 | // Compute which line the comment is on. |
430 | std::pair<FileID, unsigned> ExpSemiLoc = |
431 | SrcMgr.getDecomposedExpansionLoc(Loc: TokLoc); |
432 | FID = ExpSemiLoc.first; |
433 | LineNo = SrcMgr.getLineNumber(FID, FilePos: ExpSemiLoc.second); |
434 | } |
435 | } else if (SingleLineMode || InAsmComment) { |
436 | // If end-of-line is significant, check whether this token is on a |
437 | // new line. |
438 | std::pair<FileID, unsigned> ExpLoc = |
439 | SrcMgr.getDecomposedExpansionLoc(Loc: TokLoc); |
440 | if (ExpLoc.first != FID || |
441 | SrcMgr.getLineNumber(FID: ExpLoc.first, FilePos: ExpLoc.second) != LineNo) { |
442 | // If this is a single-line __asm, we're done, except if the next |
443 | // line is MS-style asm too, in which case we finish a comment |
444 | // if needed and then keep processing the next line as a single |
445 | // line __asm. |
446 | bool isAsm = Tok.is(K: tok::kw_asm); |
447 | if (SingleLineMode && (!isAsm || isGCCAsmStatement(TokAfterAsm: NextToken()))) |
448 | break; |
449 | // We're no longer in a comment. |
450 | InAsmComment = false; |
451 | if (isAsm) { |
452 | // If this is a new __asm {} block we want to process it separately |
453 | // from the single-line __asm statements |
454 | if (PP.LookAhead(N: 0).is(K: tok::l_brace)) |
455 | break; |
456 | LineNo = SrcMgr.getLineNumber(FID: ExpLoc.first, FilePos: ExpLoc.second); |
457 | SkippedStartOfLine = Tok.isAtStartOfLine(); |
458 | } else if (Tok.is(K: tok::semi)) { |
459 | // A multi-line asm-statement, where next line is a comment |
460 | InAsmComment = true; |
461 | FID = ExpLoc.first; |
462 | LineNo = SrcMgr.getLineNumber(FID, FilePos: ExpLoc.second); |
463 | } |
464 | } else if (!InAsmComment && Tok.is(K: tok::r_brace)) { |
465 | // In MSVC mode, braces only participate in brace matching and |
466 | // separating the asm statements. This is an intentional |
467 | // departure from the Apple gcc behavior. |
468 | if (!BraceNesting) |
469 | break; |
470 | } |
471 | } |
472 | if (!InAsmComment && BraceNesting && Tok.is(K: tok::r_brace) && |
473 | BraceCount == (savedBraceCount + BraceNesting)) { |
474 | // Consume the closing brace. |
475 | SkippedStartOfLine = Tok.isAtStartOfLine(); |
476 | // Don't want to add the closing brace of the whole asm block |
477 | if (SingleLineMode || BraceNesting > 1) { |
478 | Tok.clearFlag(Flag: Token::LeadingSpace); |
479 | AsmToks.push_back(Elt: Tok); |
480 | } |
481 | EndLoc = ConsumeBrace(); |
482 | BraceNesting--; |
483 | // Finish if all of the opened braces in the inline asm section were |
484 | // consumed. |
485 | if (BraceNesting == 0 && !SingleLineMode) |
486 | break; |
487 | else { |
488 | LBraceLocs.pop_back(); |
489 | TokLoc = Tok.getLocation(); |
490 | ++NumTokensRead; |
491 | continue; |
492 | } |
493 | } |
494 | |
495 | // Consume the next token; make sure we don't modify the brace count etc. |
496 | // if we are in a comment. |
497 | EndLoc = TokLoc; |
498 | if (InAsmComment) |
499 | PP.Lex(Result&: Tok); |
500 | else { |
501 | // Set the token as the start of line if we skipped the original start |
502 | // of line token in case it was a nested brace. |
503 | if (SkippedStartOfLine) |
504 | Tok.setFlag(Token::StartOfLine); |
505 | AsmToks.push_back(Elt: Tok); |
506 | ConsumeAnyToken(); |
507 | } |
508 | TokLoc = Tok.getLocation(); |
509 | ++NumTokensRead; |
510 | SkippedStartOfLine = false; |
511 | } while (true); |
512 | |
513 | if (BraceNesting && BraceCount != savedBraceCount) { |
514 | // __asm without closing brace (this can happen at EOF). |
515 | for (unsigned i = 0; i < BraceNesting; ++i) { |
516 | Diag(Tok, diag::err_expected) << tok::r_brace; |
517 | Diag(LBraceLocs.back(), diag::note_matching) << tok::l_brace; |
518 | LBraceLocs.pop_back(); |
519 | } |
520 | return StmtError(); |
521 | } else if (NumTokensRead == 0) { |
522 | // Empty __asm. |
523 | Diag(Tok, diag::err_expected) << tok::l_brace; |
524 | return StmtError(); |
525 | } |
526 | |
527 | // Okay, prepare to use MC to parse the assembly. |
528 | SmallVector<StringRef, 4> ConstraintRefs; |
529 | SmallVector<Expr *, 4> Exprs; |
530 | SmallVector<StringRef, 4> ClobberRefs; |
531 | |
532 | // We need an actual supported target. |
533 | const llvm::Triple &TheTriple = Actions.Context.getTargetInfo().getTriple(); |
534 | const std::string &TT = TheTriple.getTriple(); |
535 | const llvm::Target *TheTarget = nullptr; |
536 | if (!TheTriple.isX86()) { |
537 | Diag(AsmLoc, diag::err_msasm_unsupported_arch) << TheTriple.getArchName(); |
538 | } else { |
539 | std::string Error; |
540 | TheTarget = llvm::TargetRegistry::lookupTarget(Triple: TT, Error); |
541 | if (!TheTarget) |
542 | Diag(AsmLoc, diag::err_msasm_unable_to_create_target) << Error; |
543 | } |
544 | |
545 | assert(!LBraceLocs.empty() && "Should have at least one location here" ); |
546 | |
547 | SmallString<512> AsmString; |
548 | auto EmptyStmt = [&] { |
549 | return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc: LBraceLocs[0], AsmToks, AsmString, |
550 | /*NumOutputs*/ 0, /*NumInputs*/ 0, |
551 | Constraints: ConstraintRefs, Clobbers: ClobberRefs, Exprs, EndLoc); |
552 | }; |
553 | // If we don't support assembly, or the assembly is empty, we don't |
554 | // need to instantiate the AsmParser, etc. |
555 | if (!TheTarget || AsmToks.empty()) { |
556 | return EmptyStmt(); |
557 | } |
558 | |
559 | // Expand the tokens into a string buffer. |
560 | SmallVector<unsigned, 8> TokOffsets; |
561 | if (buildMSAsmString(PP, AsmLoc, AsmToks, TokOffsets, Asm&: AsmString)) |
562 | return StmtError(); |
563 | |
564 | const TargetOptions &TO = Actions.Context.getTargetInfo().getTargetOpts(); |
565 | std::string FeaturesStr = |
566 | llvm::join(Begin: TO.Features.begin(), End: TO.Features.end(), Separator: "," ); |
567 | |
568 | std::unique_ptr<llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); |
569 | if (!MRI) { |
570 | Diag(AsmLoc, diag::err_msasm_unable_to_create_target) |
571 | << "target MC unavailable" ; |
572 | return EmptyStmt(); |
573 | } |
574 | // FIXME: init MCOptions from sanitizer flags here. |
575 | llvm::MCTargetOptions MCOptions; |
576 | std::unique_ptr<llvm::MCAsmInfo> MAI( |
577 | TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TT, Options: MCOptions)); |
578 | // Get the instruction descriptor. |
579 | std::unique_ptr<llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo()); |
580 | std::unique_ptr<llvm::MCSubtargetInfo> STI( |
581 | TheTarget->createMCSubtargetInfo(TheTriple: TT, CPU: TO.CPU, Features: FeaturesStr)); |
582 | // Target MCTargetDesc may not be linked in clang-based tools. |
583 | |
584 | if (!MAI || !MII || !STI) { |
585 | Diag(AsmLoc, diag::err_msasm_unable_to_create_target) |
586 | << "target MC unavailable" ; |
587 | return EmptyStmt(); |
588 | } |
589 | |
590 | llvm::SourceMgr TempSrcMgr; |
591 | llvm::MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &TempSrcMgr); |
592 | std::unique_ptr<llvm::MCObjectFileInfo> MOFI( |
593 | TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false)); |
594 | Ctx.setObjectFileInfo(MOFI.get()); |
595 | |
596 | std::unique_ptr<llvm::MemoryBuffer> Buffer = |
597 | llvm::MemoryBuffer::getMemBuffer(InputData: AsmString, BufferName: "<MS inline asm>" ); |
598 | |
599 | // Tell SrcMgr about this buffer, which is what the parser will pick up. |
600 | TempSrcMgr.AddNewSourceBuffer(F: std::move(Buffer), IncludeLoc: llvm::SMLoc()); |
601 | |
602 | std::unique_ptr<llvm::MCStreamer> Str(createNullStreamer(Ctx)); |
603 | std::unique_ptr<llvm::MCAsmParser> Parser( |
604 | createMCAsmParser(TempSrcMgr, Ctx, *Str.get(), *MAI)); |
605 | |
606 | std::unique_ptr<llvm::MCTargetAsmParser> TargetParser( |
607 | TheTarget->createMCAsmParser(STI: *STI, Parser&: *Parser, MII: *MII, Options: MCOptions)); |
608 | // Target AsmParser may not be linked in clang-based tools. |
609 | if (!TargetParser) { |
610 | Diag(AsmLoc, diag::err_msasm_unable_to_create_target) |
611 | << "target ASM parser unavailable" ; |
612 | return EmptyStmt(); |
613 | } |
614 | |
615 | std::unique_ptr<llvm::MCInstPrinter> IP( |
616 | TheTarget->createMCInstPrinter(T: llvm::Triple(TT), SyntaxVariant: 1, MAI: *MAI, MII: *MII, MRI: *MRI)); |
617 | |
618 | // Change to the Intel dialect. |
619 | Parser->setAssemblerDialect(1); |
620 | Parser->setTargetParser(*TargetParser.get()); |
621 | Parser->setParsingMSInlineAsm(true); |
622 | TargetParser->setParsingMSInlineAsm(true); |
623 | |
624 | ClangAsmParserCallback Callback(*this, AsmLoc, AsmString, AsmToks, |
625 | TokOffsets); |
626 | TargetParser->setSemaCallback(&Callback); |
627 | TempSrcMgr.setDiagHandler(DH: ClangAsmParserCallback::DiagHandlerCallback, |
628 | Ctx: &Callback); |
629 | |
630 | unsigned NumOutputs; |
631 | unsigned NumInputs; |
632 | std::string AsmStringIR; |
633 | SmallVector<std::pair<void *, bool>, 4> OpExprs; |
634 | SmallVector<std::string, 4> Constraints; |
635 | SmallVector<std::string, 4> Clobbers; |
636 | if (Parser->parseMSInlineAsm(AsmString&: AsmStringIR, NumOutputs, NumInputs, OpDecls&: OpExprs, |
637 | Constraints, Clobbers, MII: MII.get(), IP: IP.get(), |
638 | SI&: Callback)) |
639 | return StmtError(); |
640 | |
641 | // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber |
642 | // constraints. Clang always adds fpsr to the clobber list anyway. |
643 | llvm::erase_if(C&: Clobbers, P: [](const std::string &C) { |
644 | return C == "fpsr" || C == "mxcsr" ; |
645 | }); |
646 | |
647 | // Build the vector of clobber StringRefs. |
648 | ClobberRefs.insert(I: ClobberRefs.end(), From: Clobbers.begin(), To: Clobbers.end()); |
649 | |
650 | // Recast the void pointers and build the vector of constraint StringRefs. |
651 | unsigned NumExprs = NumOutputs + NumInputs; |
652 | ConstraintRefs.resize(N: NumExprs); |
653 | Exprs.resize(N: NumExprs); |
654 | for (unsigned i = 0, e = NumExprs; i != e; ++i) { |
655 | Expr *OpExpr = static_cast<Expr *>(OpExprs[i].first); |
656 | if (!OpExpr) |
657 | return StmtError(); |
658 | |
659 | // Need address of variable. |
660 | if (OpExprs[i].second) |
661 | OpExpr = |
662 | Actions.BuildUnaryOp(S: getCurScope(), OpLoc: AsmLoc, Opc: UO_AddrOf, Input: OpExpr).get(); |
663 | |
664 | ConstraintRefs[i] = StringRef(Constraints[i]); |
665 | Exprs[i] = OpExpr; |
666 | } |
667 | |
668 | // FIXME: We should be passing source locations for better diagnostics. |
669 | return Actions.ActOnMSAsmStmt(AsmLoc, LBraceLoc: LBraceLocs[0], AsmToks, AsmString: AsmStringIR, |
670 | NumOutputs, NumInputs, Constraints: ConstraintRefs, |
671 | Clobbers: ClobberRefs, Exprs, EndLoc); |
672 | } |
673 | |
674 | /// parseGNUAsmQualifierListOpt - Parse a GNU extended asm qualifier list. |
675 | /// asm-qualifier: |
676 | /// volatile |
677 | /// inline |
678 | /// goto |
679 | /// |
680 | /// asm-qualifier-list: |
681 | /// asm-qualifier |
682 | /// asm-qualifier-list asm-qualifier |
683 | bool Parser::parseGNUAsmQualifierListOpt(GNUAsmQualifiers &AQ) { |
684 | while (true) { |
685 | const GNUAsmQualifiers::AQ A = getGNUAsmQualifier(Tok); |
686 | if (A == GNUAsmQualifiers::AQ_unspecified) { |
687 | if (Tok.isNot(K: tok::l_paren)) { |
688 | Diag(Tok.getLocation(), diag::err_asm_qualifier_ignored); |
689 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
690 | return true; |
691 | } |
692 | return false; |
693 | } |
694 | if (AQ.setAsmQualifier(A)) |
695 | Diag(Tok.getLocation(), diag::err_asm_duplicate_qual) |
696 | << GNUAsmQualifiers::getQualifierName(A); |
697 | ConsumeToken(); |
698 | } |
699 | return false; |
700 | } |
701 | |
702 | /// ParseAsmStatement - Parse a GNU extended asm statement. |
703 | /// asm-statement: |
704 | /// gnu-asm-statement |
705 | /// ms-asm-statement |
706 | /// |
707 | /// [GNU] gnu-asm-statement: |
708 | /// 'asm' asm-qualifier-list[opt] '(' asm-argument ')' ';' |
709 | /// |
710 | /// [GNU] asm-argument: |
711 | /// asm-string-literal |
712 | /// asm-string-literal ':' asm-operands[opt] |
713 | /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] |
714 | /// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] |
715 | /// ':' asm-clobbers |
716 | /// |
717 | /// [GNU] asm-clobbers: |
718 | /// asm-string-literal |
719 | /// asm-clobbers ',' asm-string-literal |
720 | /// |
721 | StmtResult Parser::ParseAsmStatement(bool &msAsm) { |
722 | assert(Tok.is(tok::kw_asm) && "Not an asm stmt" ); |
723 | SourceLocation AsmLoc = ConsumeToken(); |
724 | |
725 | if (getLangOpts().AsmBlocks && !isGCCAsmStatement(TokAfterAsm: Tok)) { |
726 | msAsm = true; |
727 | return ParseMicrosoftAsmStatement(AsmLoc); |
728 | } |
729 | |
730 | SourceLocation Loc = Tok.getLocation(); |
731 | GNUAsmQualifiers GAQ; |
732 | if (parseGNUAsmQualifierListOpt(AQ&: GAQ)) |
733 | return StmtError(); |
734 | |
735 | if (GAQ.isGoto() && getLangOpts().SpeculativeLoadHardening) |
736 | Diag(Loc, diag::warn_slh_does_not_support_asm_goto); |
737 | |
738 | BalancedDelimiterTracker T(*this, tok::l_paren); |
739 | T.consumeOpen(); |
740 | |
741 | ExprResult AsmString(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); |
742 | |
743 | // Check if GNU-style InlineAsm is disabled. |
744 | // Error on anything other than empty string. |
745 | if (!(getLangOpts().GNUAsm || AsmString.isInvalid())) { |
746 | const auto *SL = cast<StringLiteral>(Val: AsmString.get()); |
747 | if (!SL->getString().trim().empty()) |
748 | Diag(Loc, diag::err_gnu_inline_asm_disabled); |
749 | } |
750 | |
751 | if (AsmString.isInvalid()) { |
752 | // Consume up to and including the closing paren. |
753 | T.skipToEnd(); |
754 | return StmtError(); |
755 | } |
756 | |
757 | SmallVector<IdentifierInfo *, 4> Names; |
758 | ExprVector Constraints; |
759 | ExprVector Exprs; |
760 | ExprVector Clobbers; |
761 | |
762 | if (Tok.is(K: tok::r_paren)) { |
763 | // We have a simple asm expression like 'asm("foo")'. |
764 | T.consumeClose(); |
765 | return Actions.ActOnGCCAsmStmt( |
766 | AsmLoc, /*isSimple*/ IsSimple: true, IsVolatile: GAQ.isVolatile(), |
767 | /*NumOutputs*/ 0, /*NumInputs*/ 0, Names: nullptr, Constraints, Exprs, |
768 | AsmString: AsmString.get(), Clobbers, /*NumLabels*/ 0, RParenLoc: T.getCloseLocation()); |
769 | } |
770 | |
771 | // Parse Outputs, if present. |
772 | bool = false; |
773 | if (Tok.is(K: tok::colon) || Tok.is(K: tok::coloncolon)) { |
774 | // In C++ mode, parse "::" like ": :". |
775 | AteExtraColon = Tok.is(K: tok::coloncolon); |
776 | ConsumeToken(); |
777 | |
778 | if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) |
779 | return StmtError(); |
780 | } |
781 | |
782 | unsigned NumOutputs = Names.size(); |
783 | |
784 | // Parse Inputs, if present. |
785 | if (AteExtraColon || Tok.is(K: tok::colon) || Tok.is(K: tok::coloncolon)) { |
786 | // In C++ mode, parse "::" like ": :". |
787 | if (AteExtraColon) |
788 | AteExtraColon = false; |
789 | else { |
790 | AteExtraColon = Tok.is(K: tok::coloncolon); |
791 | ConsumeToken(); |
792 | } |
793 | |
794 | if (!AteExtraColon && ParseAsmOperandsOpt(Names, Constraints, Exprs)) |
795 | return StmtError(); |
796 | } |
797 | |
798 | assert(Names.size() == Constraints.size() && |
799 | Constraints.size() == Exprs.size() && "Input operand size mismatch!" ); |
800 | |
801 | unsigned NumInputs = Names.size() - NumOutputs; |
802 | |
803 | // Parse the clobbers, if present. |
804 | if (AteExtraColon || Tok.is(K: tok::colon) || Tok.is(K: tok::coloncolon)) { |
805 | if (AteExtraColon) |
806 | AteExtraColon = false; |
807 | else { |
808 | AteExtraColon = Tok.is(K: tok::coloncolon); |
809 | ConsumeToken(); |
810 | } |
811 | // Parse the asm-string list for clobbers if present. |
812 | if (!AteExtraColon && isTokenStringLiteral()) { |
813 | while (true) { |
814 | ExprResult Clobber(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); |
815 | |
816 | if (Clobber.isInvalid()) |
817 | break; |
818 | |
819 | Clobbers.push_back(Elt: Clobber.get()); |
820 | |
821 | if (!TryConsumeToken(Expected: tok::comma)) |
822 | break; |
823 | } |
824 | } |
825 | } |
826 | if (!GAQ.isGoto() && (Tok.isNot(K: tok::r_paren) || AteExtraColon)) { |
827 | Diag(Tok, diag::err_expected) << tok::r_paren; |
828 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
829 | return StmtError(); |
830 | } |
831 | |
832 | // Parse the goto label, if present. |
833 | unsigned NumLabels = 0; |
834 | if (AteExtraColon || Tok.is(K: tok::colon)) { |
835 | if (!AteExtraColon) |
836 | ConsumeToken(); |
837 | |
838 | while (true) { |
839 | if (Tok.isNot(K: tok::identifier)) { |
840 | Diag(Tok, diag::err_expected) << tok::identifier; |
841 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
842 | return StmtError(); |
843 | } |
844 | LabelDecl *LD = Actions.LookupOrCreateLabel(II: Tok.getIdentifierInfo(), |
845 | IdentLoc: Tok.getLocation()); |
846 | Names.push_back(Elt: Tok.getIdentifierInfo()); |
847 | if (!LD) { |
848 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
849 | return StmtError(); |
850 | } |
851 | ExprResult Res = |
852 | Actions.ActOnAddrLabel(OpLoc: Tok.getLocation(), LabLoc: Tok.getLocation(), TheDecl: LD); |
853 | Exprs.push_back(Elt: Res.get()); |
854 | NumLabels++; |
855 | ConsumeToken(); |
856 | if (!TryConsumeToken(Expected: tok::comma)) |
857 | break; |
858 | } |
859 | } else if (GAQ.isGoto()) { |
860 | Diag(Tok, diag::err_expected) << tok::colon; |
861 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
862 | return StmtError(); |
863 | } |
864 | T.consumeClose(); |
865 | return Actions.ActOnGCCAsmStmt(AsmLoc, IsSimple: false, IsVolatile: GAQ.isVolatile(), NumOutputs, |
866 | NumInputs, Names: Names.data(), Constraints, Exprs, |
867 | AsmString: AsmString.get(), Clobbers, NumLabels, |
868 | RParenLoc: T.getCloseLocation()); |
869 | } |
870 | |
871 | /// ParseAsmOperands - Parse the asm-operands production as used by |
872 | /// asm-statement, assuming the leading ':' token was eaten. |
873 | /// |
874 | /// [GNU] asm-operands: |
875 | /// asm-operand |
876 | /// asm-operands ',' asm-operand |
877 | /// |
878 | /// [GNU] asm-operand: |
879 | /// asm-string-literal '(' expression ')' |
880 | /// '[' identifier ']' asm-string-literal '(' expression ')' |
881 | /// |
882 | // |
883 | // FIXME: Avoid unnecessary std::string trashing. |
884 | bool Parser::ParseAsmOperandsOpt(SmallVectorImpl<IdentifierInfo *> &Names, |
885 | SmallVectorImpl<Expr *> &Constraints, |
886 | SmallVectorImpl<Expr *> &Exprs) { |
887 | // 'asm-operands' isn't present? |
888 | if (!isTokenStringLiteral() && Tok.isNot(K: tok::l_square)) |
889 | return false; |
890 | |
891 | while (true) { |
892 | // Read the [id] if present. |
893 | if (Tok.is(K: tok::l_square)) { |
894 | BalancedDelimiterTracker T(*this, tok::l_square); |
895 | T.consumeOpen(); |
896 | |
897 | if (Tok.isNot(K: tok::identifier)) { |
898 | Diag(Tok, diag::err_expected) << tok::identifier; |
899 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
900 | return true; |
901 | } |
902 | |
903 | IdentifierInfo *II = Tok.getIdentifierInfo(); |
904 | ConsumeToken(); |
905 | |
906 | Names.push_back(Elt: II); |
907 | T.consumeClose(); |
908 | } else |
909 | Names.push_back(Elt: nullptr); |
910 | |
911 | ExprResult Constraint(ParseAsmStringLiteral(/*ForAsmLabel*/ false)); |
912 | if (Constraint.isInvalid()) { |
913 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
914 | return true; |
915 | } |
916 | Constraints.push_back(Elt: Constraint.get()); |
917 | |
918 | if (Tok.isNot(K: tok::l_paren)) { |
919 | Diag(Tok, diag::err_expected_lparen_after) << "asm operand" ; |
920 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
921 | return true; |
922 | } |
923 | |
924 | // Read the parenthesized expression. |
925 | BalancedDelimiterTracker T(*this, tok::l_paren); |
926 | T.consumeOpen(); |
927 | ExprResult Res = Actions.CorrectDelayedTyposInExpr(ER: ParseExpression()); |
928 | T.consumeClose(); |
929 | if (Res.isInvalid()) { |
930 | SkipUntil(T: tok::r_paren, Flags: StopAtSemi); |
931 | return true; |
932 | } |
933 | Exprs.push_back(Elt: Res.get()); |
934 | // Eat the comma and continue parsing if it exists. |
935 | if (!TryConsumeToken(Expected: tok::comma)) |
936 | return false; |
937 | } |
938 | } |
939 | |
940 | const char *Parser::GNUAsmQualifiers::getQualifierName(AQ Qualifier) { |
941 | switch (Qualifier) { |
942 | case AQ_volatile: return "volatile" ; |
943 | case AQ_inline: return "inline" ; |
944 | case AQ_goto: return "goto" ; |
945 | case AQ_unspecified: return "unspecified" ; |
946 | } |
947 | llvm_unreachable("Unknown GNUAsmQualifier" ); |
948 | } |
949 | |
950 | Parser::GNUAsmQualifiers::AQ |
951 | Parser::getGNUAsmQualifier(const Token &Tok) const { |
952 | switch (Tok.getKind()) { |
953 | case tok::kw_volatile: return GNUAsmQualifiers::AQ_volatile; |
954 | case tok::kw_inline: return GNUAsmQualifiers::AQ_inline; |
955 | case tok::kw_goto: return GNUAsmQualifiers::AQ_goto; |
956 | default: return GNUAsmQualifiers::AQ_unspecified; |
957 | } |
958 | } |
959 | bool Parser::GNUAsmQualifiers::setAsmQualifier(AQ Qualifier) { |
960 | bool IsDuplicate = Qualifiers & Qualifier; |
961 | Qualifiers |= Qualifier; |
962 | return IsDuplicate; |
963 | } |
964 | |