| 1 | //===-- AppleObjCTypeEncodingParser.cpp -----------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AppleObjCTypeEncodingParser.h" |
| 10 | |
| 11 | #include "Plugins/ExpressionParser/Clang/ClangUtil.h" |
| 12 | #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" |
| 13 | #include "lldb/Symbol/CompilerType.h" |
| 14 | #include "lldb/Target/Process.h" |
| 15 | #include "lldb/Target/Target.h" |
| 16 | #include "lldb/Utility/LLDBLog.h" |
| 17 | #include "lldb/Utility/Log.h" |
| 18 | #include "lldb/Utility/StringLexer.h" |
| 19 | |
| 20 | #include "clang/Basic/TargetInfo.h" |
| 21 | |
| 22 | #include <optional> |
| 23 | #include <vector> |
| 24 | |
| 25 | using namespace lldb_private; |
| 26 | |
| 27 | AppleObjCTypeEncodingParser::AppleObjCTypeEncodingParser( |
| 28 | ObjCLanguageRuntime &runtime) |
| 29 | : ObjCLanguageRuntime::EncodingToType(), m_runtime(runtime) { |
| 30 | if (m_scratch_ast_ctx_sp) |
| 31 | return; |
| 32 | |
| 33 | m_scratch_ast_ctx_sp = std::make_shared<TypeSystemClang>( |
| 34 | args: "AppleObjCTypeEncodingParser ASTContext" , |
| 35 | args: runtime.GetProcess()->GetTarget().GetArchitecture().GetTriple()); |
| 36 | } |
| 37 | |
| 38 | std::string AppleObjCTypeEncodingParser::ReadStructName(StringLexer &type) { |
| 39 | StreamString buffer; |
| 40 | while (type.HasAtLeast(s: 1) && type.Peek() != '=') |
| 41 | buffer.Printf(format: "%c" , type.Next()); |
| 42 | return std::string(buffer.GetString()); |
| 43 | } |
| 44 | |
| 45 | std::optional<std::string> |
| 46 | AppleObjCTypeEncodingParser::ReadQuotedString(StringLexer &type) { |
| 47 | if (!type.HasAtLeast(s: 1)) |
| 48 | return std::nullopt; |
| 49 | |
| 50 | StreamString buffer; |
| 51 | while (type.Peek() != '"') { |
| 52 | buffer.Printf(format: "%c" , type.Next()); |
| 53 | if (!type.HasAtLeast(s: 1)) |
| 54 | return std::nullopt; |
| 55 | } |
| 56 | return std::string(buffer.GetString()); |
| 57 | } |
| 58 | |
| 59 | uint32_t AppleObjCTypeEncodingParser::ReadNumber(StringLexer &type) { |
| 60 | uint32_t total = 0; |
| 61 | while (type.HasAtLeast(s: 1) && isdigit(type.Peek())) |
| 62 | total = 10 * total + (type.Next() - '0'); |
| 63 | return total; |
| 64 | } |
| 65 | |
| 66 | // as an extension to the published grammar recent runtimes emit structs like |
| 67 | // this: |
| 68 | // "{CGRect=\"origin\"{CGPoint=\"x\"d\"y\"d}\"size\"{CGSize=\"width\"d\"height\"d}}" |
| 69 | |
| 70 | AppleObjCTypeEncodingParser::StructElement::StructElement() |
| 71 | : type(clang::QualType()) {} |
| 72 | |
| 73 | AppleObjCTypeEncodingParser::StructElement |
| 74 | AppleObjCTypeEncodingParser::ReadStructElement(TypeSystemClang &ast_ctx, |
| 75 | StringLexer &type, |
| 76 | bool for_expression) { |
| 77 | StructElement retval; |
| 78 | if (type.NextIf(c: '"')) { |
| 79 | if (auto maybe_name = ReadQuotedString(type)) |
| 80 | retval.name = *maybe_name; |
| 81 | else |
| 82 | return retval; |
| 83 | } |
| 84 | uint32_t bitfield_size = 0; |
| 85 | retval.type = BuildType(clang_ast_ctx&: ast_ctx, type, for_expression, bitfield_bit_size: &bitfield_size); |
| 86 | retval.bitfield = bitfield_size; |
| 87 | return retval; |
| 88 | } |
| 89 | |
| 90 | clang::QualType AppleObjCTypeEncodingParser::BuildStruct( |
| 91 | TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) { |
| 92 | return BuildAggregate(clang_ast_ctx&: ast_ctx, type, for_expression, _C_STRUCT_B, _C_STRUCT_E, |
| 93 | kind: llvm::to_underlying(E: clang::TagTypeKind::Struct)); |
| 94 | } |
| 95 | |
| 96 | clang::QualType AppleObjCTypeEncodingParser::BuildUnion( |
| 97 | TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) { |
| 98 | return BuildAggregate(clang_ast_ctx&: ast_ctx, type, for_expression, _C_UNION_B, _C_UNION_E, |
| 99 | kind: llvm::to_underlying(E: clang::TagTypeKind::Union)); |
| 100 | } |
| 101 | |
| 102 | clang::QualType AppleObjCTypeEncodingParser::BuildAggregate( |
| 103 | TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression, |
| 104 | char opener, char closer, uint32_t kind) { |
| 105 | if (!type.NextIf(c: opener)) |
| 106 | return clang::QualType(); |
| 107 | std::string name(ReadStructName(type)); |
| 108 | |
| 109 | // We do not handle templated classes/structs at the moment. If the name has |
| 110 | // a < in it, we are going to abandon this. We're still obliged to parse it, |
| 111 | // so we just set a flag that means "Don't actually build anything." |
| 112 | |
| 113 | const bool is_templated = name.find(c: '<') != std::string::npos; |
| 114 | |
| 115 | if (!type.NextIf(c: '=')) |
| 116 | return clang::QualType(); |
| 117 | bool in_union = true; |
| 118 | std::vector<StructElement> elements; |
| 119 | while (in_union && type.HasAtLeast(s: 1)) { |
| 120 | if (type.NextIf(c: closer)) { |
| 121 | in_union = false; |
| 122 | break; |
| 123 | } else { |
| 124 | auto element = ReadStructElement(ast_ctx, type, for_expression); |
| 125 | if (element.type.isNull()) |
| 126 | break; |
| 127 | else |
| 128 | elements.push_back(x: element); |
| 129 | } |
| 130 | } |
| 131 | if (in_union) |
| 132 | return clang::QualType(); |
| 133 | |
| 134 | if (is_templated) |
| 135 | return clang::QualType(); // This is where we bail out. Sorry! |
| 136 | |
| 137 | CompilerType union_type(ast_ctx.CreateRecordType( |
| 138 | decl_ctx: nullptr, owning_module: OptionalClangModuleID(), access_type: lldb::eAccessPublic, name, kind, |
| 139 | language: lldb::eLanguageTypeC)); |
| 140 | if (union_type) { |
| 141 | TypeSystemClang::StartTagDeclarationDefinition(type: union_type); |
| 142 | |
| 143 | unsigned int count = 0; |
| 144 | for (auto element : elements) { |
| 145 | if (element.name.empty()) { |
| 146 | StreamString elem_name; |
| 147 | elem_name.Printf(format: "__unnamed_%u" , count); |
| 148 | element.name = std::string(elem_name.GetString()); |
| 149 | } |
| 150 | TypeSystemClang::AddFieldToRecordType( |
| 151 | type: union_type, name: element.name.c_str(), field_type: ast_ctx.GetType(qt: element.type), |
| 152 | access: lldb::eAccessPublic, bitfield_bit_size: element.bitfield); |
| 153 | ++count; |
| 154 | } |
| 155 | TypeSystemClang::CompleteTagDeclarationDefinition(type: union_type); |
| 156 | } |
| 157 | return ClangUtil::GetQualType(ct: union_type); |
| 158 | } |
| 159 | |
| 160 | clang::QualType AppleObjCTypeEncodingParser::BuildArray( |
| 161 | TypeSystemClang &ast_ctx, StringLexer &type, bool for_expression) { |
| 162 | if (!type.NextIf(_C_ARY_B)) |
| 163 | return clang::QualType(); |
| 164 | uint32_t size = ReadNumber(type); |
| 165 | clang::QualType element_type(BuildType(clang_ast_ctx&: ast_ctx, type, for_expression)); |
| 166 | if (!type.NextIf(_C_ARY_E)) |
| 167 | return clang::QualType(); |
| 168 | CompilerType array_type(ast_ctx.CreateArrayType( |
| 169 | element_type: CompilerType(ast_ctx.weak_from_this(), element_type.getAsOpaquePtr()), |
| 170 | element_count: size, is_vector: false)); |
| 171 | return ClangUtil::GetQualType(ct: array_type); |
| 172 | } |
| 173 | |
| 174 | // the runtime can emit these in the form of @"SomeType", giving more specifics |
| 175 | // this would be interesting for expression parser interop, but since we |
| 176 | // actually try to avoid exposing the ivar info to the expression evaluator, |
| 177 | // consume but ignore the type info and always return an 'id'; if anything, |
| 178 | // dynamic typing will resolve things for us anyway |
| 179 | clang::QualType AppleObjCTypeEncodingParser::BuildObjCObjectPointerType( |
| 180 | TypeSystemClang &clang_ast_ctx, StringLexer &type, bool for_expression) { |
| 181 | if (!type.NextIf(_C_ID)) |
| 182 | return clang::QualType(); |
| 183 | |
| 184 | clang::ASTContext &ast_ctx = clang_ast_ctx.getASTContext(); |
| 185 | |
| 186 | std::string name; |
| 187 | |
| 188 | if (type.NextIf(c: '"')) { |
| 189 | // We have to be careful here. We're used to seeing |
| 190 | // @"NSString" |
| 191 | // but in records it is possible that the string following an @ is the name |
| 192 | // of the next field and @ means "id". This is the case if anything |
| 193 | // unquoted except for "}", the end of the type, or another name follows |
| 194 | // the quoted string. |
| 195 | // |
| 196 | // E.g. |
| 197 | // - @"NSString"@ means "id, followed by a field named NSString of type id" |
| 198 | // - @"NSString"} means "a pointer to NSString and the end of the struct" - |
| 199 | // @"NSString""nextField" means "a pointer to NSString and a field named |
| 200 | // nextField" - @"NSString" followed by the end of the string means "a |
| 201 | // pointer to NSString" |
| 202 | // |
| 203 | // As a result, the rule is: If we see @ followed by a quoted string, we |
| 204 | // peek. - If we see }, ), ], the end of the string, or a quote ("), the |
| 205 | // quoted string is a class name. - If we see anything else, the quoted |
| 206 | // string is a field name and we push it back onto type. |
| 207 | |
| 208 | if (auto maybe_name = ReadQuotedString(type)) |
| 209 | name = *maybe_name; |
| 210 | else |
| 211 | return clang::QualType(); |
| 212 | |
| 213 | if (type.HasAtLeast(s: 1)) { |
| 214 | switch (type.Peek()) { |
| 215 | default: |
| 216 | // roll back |
| 217 | type.PutBack(s: name.length() + |
| 218 | 2); // undo our consumption of the string and of the quotes |
| 219 | name.clear(); |
| 220 | break; |
| 221 | case _C_STRUCT_E: |
| 222 | case _C_UNION_E: |
| 223 | case _C_ARY_E: |
| 224 | case '"': |
| 225 | // the quoted string is a class name – see the rule |
| 226 | break; |
| 227 | } |
| 228 | } else { |
| 229 | // the quoted string is a class name – see the rule |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | if (for_expression && !name.empty()) { |
| 234 | size_t less_than_pos = name.find(c: '<'); |
| 235 | |
| 236 | if (less_than_pos != std::string::npos) { |
| 237 | if (less_than_pos == 0) |
| 238 | return ast_ctx.getObjCIdType(); |
| 239 | else |
| 240 | name.erase(pos: less_than_pos); |
| 241 | } |
| 242 | |
| 243 | DeclVendor *decl_vendor = m_runtime.GetDeclVendor(); |
| 244 | if (!decl_vendor) |
| 245 | return clang::QualType(); |
| 246 | |
| 247 | auto types = decl_vendor->FindTypes(name: ConstString(name), /*max_matches*/ 1); |
| 248 | |
| 249 | if (types.empty()) { |
| 250 | // The user can forward-declare something that has no definition. The |
| 251 | // runtime doesn't prohibit this at all. This is a rare and very weird |
| 252 | // case. Assert assert in debug builds so we catch other weird cases. |
| 253 | assert(false && "forward declaration without definition" ); |
| 254 | LLDB_LOG(GetLog(LLDBLog::Types), |
| 255 | "forward declaration without definition: {0}" , name); |
| 256 | return ast_ctx.getObjCIdType(); |
| 257 | } |
| 258 | |
| 259 | return ClangUtil::GetQualType(ct: types.front().GetPointerType()); |
| 260 | } else { |
| 261 | // We're going to resolve this dynamically anyway, so just smile and wave. |
| 262 | return ast_ctx.getObjCIdType(); |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | clang::QualType |
| 267 | AppleObjCTypeEncodingParser::BuildType(TypeSystemClang &clang_ast_ctx, |
| 268 | StringLexer &type, bool for_expression, |
| 269 | uint32_t *bitfield_bit_size) { |
| 270 | if (!type.HasAtLeast(s: 1)) |
| 271 | return clang::QualType(); |
| 272 | |
| 273 | clang::ASTContext &ast_ctx = clang_ast_ctx.getASTContext(); |
| 274 | |
| 275 | switch (type.Peek()) { |
| 276 | default: |
| 277 | break; |
| 278 | case _C_STRUCT_B: |
| 279 | return BuildStruct(ast_ctx&: clang_ast_ctx, type, for_expression); |
| 280 | case _C_ARY_B: |
| 281 | return BuildArray(ast_ctx&: clang_ast_ctx, type, for_expression); |
| 282 | case _C_UNION_B: |
| 283 | return BuildUnion(ast_ctx&: clang_ast_ctx, type, for_expression); |
| 284 | case _C_ID: |
| 285 | return BuildObjCObjectPointerType(clang_ast_ctx, type, for_expression); |
| 286 | } |
| 287 | |
| 288 | switch (type.Next()) { |
| 289 | default: |
| 290 | type.PutBack(s: 1); |
| 291 | return clang::QualType(); |
| 292 | case _C_CHR: |
| 293 | return ast_ctx.CharTy; |
| 294 | case _C_INT: |
| 295 | return ast_ctx.IntTy; |
| 296 | case _C_SHT: |
| 297 | return ast_ctx.ShortTy; |
| 298 | case _C_LNG: |
| 299 | return ast_ctx.getIntTypeForBitwidth(DestWidth: 32, Signed: true); |
| 300 | // this used to be done like this: |
| 301 | // return clang_ast_ctx->GetIntTypeFromBitSize(32, true).GetQualType(); |
| 302 | // which uses one of the constants if one is available, but we don't think |
| 303 | // all this work is necessary. |
| 304 | case _C_LNG_LNG: |
| 305 | return ast_ctx.LongLongTy; |
| 306 | case _C_UCHR: |
| 307 | return ast_ctx.UnsignedCharTy; |
| 308 | case _C_UINT: |
| 309 | return ast_ctx.UnsignedIntTy; |
| 310 | case _C_USHT: |
| 311 | return ast_ctx.UnsignedShortTy; |
| 312 | case _C_ULNG: |
| 313 | return ast_ctx.getIntTypeForBitwidth(DestWidth: 32, Signed: false); |
| 314 | // see note for _C_LNG |
| 315 | case _C_ULNG_LNG: |
| 316 | return ast_ctx.UnsignedLongLongTy; |
| 317 | case _C_FLT: |
| 318 | return ast_ctx.FloatTy; |
| 319 | case _C_DBL: |
| 320 | return ast_ctx.DoubleTy; |
| 321 | case _C_BOOL: |
| 322 | return ast_ctx.BoolTy; |
| 323 | case _C_VOID: |
| 324 | return ast_ctx.VoidTy; |
| 325 | case _C_CHARPTR: |
| 326 | return ast_ctx.getPointerType(ast_ctx.CharTy); |
| 327 | case _C_CLASS: |
| 328 | return ast_ctx.getObjCClassType(); |
| 329 | case _C_SEL: |
| 330 | return ast_ctx.getObjCSelType(); |
| 331 | case _C_BFLD: { |
| 332 | uint32_t size = ReadNumber(type); |
| 333 | if (bitfield_bit_size) { |
| 334 | *bitfield_bit_size = size; |
| 335 | return ast_ctx.UnsignedIntTy; // FIXME: the spec is fairly vague here. |
| 336 | } else |
| 337 | return clang::QualType(); |
| 338 | } |
| 339 | case _C_CONST: { |
| 340 | clang::QualType target_type = |
| 341 | BuildType(clang_ast_ctx, type, for_expression); |
| 342 | if (target_type.isNull()) |
| 343 | return clang::QualType(); |
| 344 | else if (target_type == ast_ctx.UnknownAnyTy) |
| 345 | return ast_ctx.UnknownAnyTy; |
| 346 | else |
| 347 | return ast_ctx.getConstType(T: target_type); |
| 348 | } |
| 349 | case _C_PTR: { |
| 350 | if (!for_expression && type.NextIf(_C_UNDEF)) { |
| 351 | // if we are not supporting the concept of unknownAny, but what is being |
| 352 | // created here is an unknownAny*, then we can just get away with a void* |
| 353 | // this is theoretically wrong (in the same sense as 'theoretically |
| 354 | // nothing exists') but is way better than outright failure in many |
| 355 | // practical cases |
| 356 | return ast_ctx.VoidPtrTy; |
| 357 | } else { |
| 358 | clang::QualType target_type = |
| 359 | BuildType(clang_ast_ctx, type, for_expression); |
| 360 | if (target_type.isNull()) |
| 361 | return clang::QualType(); |
| 362 | else if (target_type == ast_ctx.UnknownAnyTy) |
| 363 | return ast_ctx.UnknownAnyTy; |
| 364 | else |
| 365 | return ast_ctx.getPointerType(T: target_type); |
| 366 | } |
| 367 | } |
| 368 | case _C_UNDEF: |
| 369 | return for_expression ? ast_ctx.UnknownAnyTy : clang::QualType(); |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | CompilerType AppleObjCTypeEncodingParser::RealizeType(TypeSystemClang &ast_ctx, |
| 374 | const char *name, |
| 375 | bool for_expression) { |
| 376 | if (name && name[0]) { |
| 377 | StringLexer lexer(name); |
| 378 | clang::QualType qual_type = BuildType(clang_ast_ctx&: ast_ctx, type&: lexer, for_expression); |
| 379 | return ast_ctx.GetType(qt: qual_type); |
| 380 | } |
| 381 | return CompilerType(); |
| 382 | } |
| 383 | |