| 1 | // z_AIX_asm.S: - microtasking routines specifically |
| 2 | // written for Power platforms running AIX OS |
| 3 | |
| 4 | // |
| 5 | ////===----------------------------------------------------------------------===// |
| 6 | //// |
| 7 | //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 8 | //// See https://llvm.org/LICENSE.txt for license information. |
| 9 | //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 10 | //// |
| 11 | ////===----------------------------------------------------------------------===// |
| 12 | // |
| 13 | |
| 14 | // ----------------------------------------------------------------------- |
| 15 | // macros |
| 16 | // ----------------------------------------------------------------------- |
| 17 | |
| 18 | #include "kmp_config.h" |
| 19 | |
| 20 | #if KMP_OS_AIX |
| 21 | //------------------------------------------------------------------------ |
| 22 | // int |
| 23 | // __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), |
| 24 | // int gtid, int tid, |
| 25 | // int argc, void *p_argv[] |
| 26 | // #if OMPT_SUPPORT |
| 27 | // , |
| 28 | // void **exit_frame_ptr |
| 29 | // #endif |
| 30 | // ) { |
| 31 | // #if OMPT_SUPPORT |
| 32 | // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); |
| 33 | // #endif |
| 34 | // |
| 35 | // (*pkfn)( & gtid, & tid, p_argv[0], ... ); |
| 36 | // |
| 37 | // // FIXME: This is done at call-site and can be removed here. |
| 38 | // #if OMPT_SUPPORT |
| 39 | // *exit_frame_ptr = 0; |
| 40 | // #endif |
| 41 | // |
| 42 | // return 1; |
| 43 | // } |
| 44 | // |
| 45 | // parameters: |
| 46 | // r3: pkfn |
| 47 | // r4: gtid |
| 48 | // r5: tid |
| 49 | // r6: argc |
| 50 | // r7: p_argv |
| 51 | // r8: &exit_frame |
| 52 | // |
| 53 | // return: r3 (always 1/TRUE) |
| 54 | // |
| 55 | |
| 56 | #if KMP_ARCH_PPC64_XCOFF |
| 57 | |
| 58 | .globl __kmp_invoke_microtask[DS] |
| 59 | .globl .__kmp_invoke_microtask |
| 60 | .align 4 |
| 61 | .csect __kmp_invoke_microtask[DS],3 |
| 62 | .vbyte 8, .__kmp_invoke_microtask |
| 63 | .vbyte 8, TOC[TC0] |
| 64 | .vbyte 8, 0 |
| 65 | .csect .text[PR],2 |
| 66 | .machine "pwr7" |
| 67 | .__kmp_invoke_microtask: |
| 68 | |
| 69 | |
| 70 | // -- Begin __kmp_invoke_microtask |
| 71 | // mark_begin; |
| 72 | |
| 73 | // We need to allocate a stack frame large enough to hold all of the parameters |
| 74 | // on the stack for the microtask plus what this function needs. That's 48 |
| 75 | // bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for |
| 76 | // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
| 77 | // plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. |
| 78 | // With OMP-T support, we need an additional 8 bytes to save r30 to hold |
| 79 | // a copy of r8. |
| 80 | // Stack offsets relative to stack pointer: |
| 81 | // r31: -8, r30: -16, gtid: -20, tid: -24 |
| 82 | |
| 83 | mflr 0 |
| 84 | std 31, -8(1) # Save r31 to the stack |
| 85 | std 0, 16(1) # Save LR to the linkage area |
| 86 | |
| 87 | // This is unusual because normally we'd set r31 equal to r1 after the stack |
| 88 | // frame is established. In this case, however, we need to dynamically compute |
| 89 | // the stack frame size, and so we keep a direct copy of r1 to access our |
| 90 | // register save areas and restore the r1 value before returning. |
| 91 | mr 31, 1 |
| 92 | |
| 93 | // Compute the size of the "argc" portion of the parameter save area. |
| 94 | // The parameter save area is always at least 64 bytes long (i.e. 8 regs) |
| 95 | // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
| 96 | // to allocate 8*6 bytes, not 8*argc. |
| 97 | li 0, 6 |
| 98 | cmpwi 0, 6, 6 |
| 99 | iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
| 100 | sldi 0, 0, 3 # r0 = 8 * max(argc, 6) |
| 101 | |
| 102 | // Compute the size necessary for the local stack frame. |
| 103 | // 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + |
| 104 | // 8 (parameter gtid) + 8 (parameter tid) |
| 105 | li 12, 88 |
| 106 | add 12, 0, 12 |
| 107 | neg 12, 12 |
| 108 | |
| 109 | // We need to make sure that the stack frame stays aligned (to 16 bytes). |
| 110 | li 0, -16 |
| 111 | and 12, 0, 12 |
| 112 | |
| 113 | // Establish the local stack frame. |
| 114 | stdux 1, 1, 12 |
| 115 | |
| 116 | #if OMPT_SUPPORT |
| 117 | std 30, -16(31) # Save r30 to the stack |
| 118 | std 1, 0(8) |
| 119 | mr 30, 8 |
| 120 | #endif |
| 121 | |
| 122 | // Store gtid and tid to the stack because they're passed by reference to the microtask. |
| 123 | stw 4, -20(31) # Save gtid to the stack |
| 124 | stw 5, -24(31) # Save tid to the stack |
| 125 | |
| 126 | mr 12, 6 # r12 = argc |
| 127 | mr 4, 7 # r4 = p_argv |
| 128 | |
| 129 | cmpwi 0, 12, 1 |
| 130 | blt 0, .Lcall # if (argc < 1) goto .Lcall |
| 131 | |
| 132 | ld 5, 0(4) # r5 = p_argv[0] |
| 133 | |
| 134 | cmpwi 0, 12, 2 |
| 135 | blt 0, .Lcall # if (argc < 2) goto .Lcall |
| 136 | |
| 137 | ld 6, 8(4) # r6 = p_argv[1] |
| 138 | |
| 139 | cmpwi 0, 12, 3 |
| 140 | blt 0, .Lcall # if (argc < 3) goto .Lcall |
| 141 | |
| 142 | ld 7, 16(4) # r7 = p_argv[2] |
| 143 | |
| 144 | cmpwi 0, 12, 4 |
| 145 | blt 0, .Lcall # if (argc < 4) goto .Lcall |
| 146 | |
| 147 | ld 8, 24(4) # r8 = p_argv[3] |
| 148 | |
| 149 | cmpwi 0, 12, 5 |
| 150 | blt 0, .Lcall # if (argc < 5) goto .Lcall |
| 151 | |
| 152 | ld 9, 32(4) # r9 = p_argv[4] |
| 153 | |
| 154 | cmpwi 0, 12, 6 |
| 155 | blt 0, .Lcall # if (argc < 6) goto .Lcall |
| 156 | |
| 157 | ld 10, 40(4) # r10 = p_argv[5] |
| 158 | |
| 159 | cmpwi 0, 12, 7 |
| 160 | blt 0, .Lcall # if (argc < 7) goto .Lcall |
| 161 | |
| 162 | // There are more than 6 microtask parameters, so we need to store the |
| 163 | // remainder to the stack. |
| 164 | addi 12, 12, -6 # argc -= 6 |
| 165 | mtctr 12 |
| 166 | |
| 167 | // These are set to 8 bytes before the first desired store address (we're using |
| 168 | // pre-increment loads and stores in the loop below). The parameter save area |
| 169 | // for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. |
| 170 | addi 4, 4, 40 # p_argv = p_argv + 5 |
| 171 | # (i.e. skip the 5 elements we already processed) |
| 172 | addi 12, 1, 104 # r12 = stack offset (112 - 8) |
| 173 | |
| 174 | .Lnext: |
| 175 | ldu 0, 8(4) |
| 176 | stdu 0, 8(12) |
| 177 | bdnz .Lnext |
| 178 | |
| 179 | .Lcall: |
| 180 | std 2, 40(1) # Save the TOC pointer to the linkage area |
| 181 | // Load the actual function address from the function descriptor. |
| 182 | ld 12, 0(3) # Function address |
| 183 | ld 2, 8(3) # TOC pointer |
| 184 | ld 11, 16(3) # Environment pointer |
| 185 | |
| 186 | addi 3, 31, -20 # r3 = >id |
| 187 | addi 4, 31, -24 # r4 = &tid |
| 188 | |
| 189 | mtctr 12 # CTR = function address |
| 190 | bctrl # Branch to CTR |
| 191 | ld 2, 40(1) # Restore TOC pointer from linkage area |
| 192 | |
| 193 | #if OMPT_SUPPORT |
| 194 | li 3, 0 |
| 195 | std 3, 0(30) |
| 196 | #endif |
| 197 | |
| 198 | li 3, 1 |
| 199 | |
| 200 | #if OMPT_SUPPORT |
| 201 | ld 30, -16(31) # Restore r30 from the saved value on the stack |
| 202 | #endif |
| 203 | |
| 204 | mr 1, 31 |
| 205 | ld 31, -8(1) # Restore r31 from the saved value on the stack |
| 206 | ld 0, 16(1) |
| 207 | mtlr 0 # Restore LR from the linkage area |
| 208 | blr # Branch to LR |
| 209 | |
| 210 | #else // KMP_ARCH_PPC_XCOFF |
| 211 | |
| 212 | .globl __kmp_invoke_microtask[DS] |
| 213 | .globl .__kmp_invoke_microtask |
| 214 | .align 4 |
| 215 | .csect __kmp_invoke_microtask[DS],2 |
| 216 | .vbyte 4, .__kmp_invoke_microtask |
| 217 | .vbyte 4, TOC[TC0] |
| 218 | .vbyte 4, 0 |
| 219 | .csect .text[PR],2 |
| 220 | .machine "pwr7" |
| 221 | .__kmp_invoke_microtask: |
| 222 | |
| 223 | |
| 224 | // -- Begin __kmp_invoke_microtask |
| 225 | // mark_begin; |
| 226 | |
| 227 | // We need to allocate a stack frame large enough to hold all of the parameters |
| 228 | // on the stack for the microtask plus what this function needs. That's 24 |
| 229 | // bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for |
| 230 | // the parameters to the microtask (gtid, tid, argc elements of p_argv), |
| 231 | // plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. |
| 232 | // With OMP-T support, we need an additional 4 bytes to save r30 to hold |
| 233 | // a copy of r8. |
| 234 | // Stack offsets relative to stack pointer: |
| 235 | // r31: -4, r30: -8, gtid: -12, tid: -16 |
| 236 | |
| 237 | mflr 0 |
| 238 | stw 31, -4(1) # Save r31 to the stack |
| 239 | stw 0, 8(1) # Save LR to the linkage area |
| 240 | |
| 241 | // This is unusual because normally we'd set r31 equal to r1 after the stack |
| 242 | // frame is established. In this case, however, we need to dynamically compute |
| 243 | // the stack frame size, and so we keep a direct copy of r1 to access our |
| 244 | // register save areas and restore the r1 value before returning. |
| 245 | mr 31, 1 |
| 246 | |
| 247 | // Compute the size of the "argc" portion of the parameter save area. |
| 248 | // The parameter save area is always at least 32 bytes long (i.e. 8 regs) |
| 249 | // The microtask has (2 + argc) parameters, so if argc <= 6, we need to |
| 250 | // to allocate 4*6 bytes, not 4*argc. |
| 251 | li 0, 6 |
| 252 | cmpwi 0, 6, 6 |
| 253 | iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 |
| 254 | slwi 0, 0, 2 # r0 = 4 * max(argc, 6) |
| 255 | |
| 256 | // Compute the size necessary for the local stack frame. |
| 257 | // 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + |
| 258 | // 4 (parameter gtid) + 4 (parameter tid) |
| 259 | li 12, 56 |
| 260 | add 12, 0, 12 |
| 261 | neg 12, 12 |
| 262 | |
| 263 | // We need to make sure that the stack frame stays aligned (to 16 bytes). |
| 264 | li 0, -16 |
| 265 | and 12, 0, 12 |
| 266 | |
| 267 | // Establish the local stack frame. |
| 268 | stwux 1, 1, 12 |
| 269 | |
| 270 | #if OMPT_SUPPORT |
| 271 | stw 30, -8(31) # Save r30 to the stack |
| 272 | stw 1, 0(8) |
| 273 | mr 30, 8 |
| 274 | #endif |
| 275 | |
| 276 | // Store gtid and tid to the stack because they're passed by reference to the microtask. |
| 277 | stw 4, -12(31) # Save gtid to the stack |
| 278 | stw 5, -16(31) # Save tid to the stack |
| 279 | |
| 280 | mr 12, 6 # r12 = argc |
| 281 | mr 4, 7 # r4 = p_argv |
| 282 | |
| 283 | cmpwi 0, 12, 1 |
| 284 | blt 0, .Lcall # if (argc < 1) goto .Lcall |
| 285 | |
| 286 | lwz 5, 0(4) # r5 = p_argv[0] |
| 287 | |
| 288 | cmpwi 0, 12, 2 |
| 289 | blt 0, .Lcall # if (argc < 2) goto .Lcall |
| 290 | |
| 291 | lwz 6, 4(4) # r6 = p_argv[1] |
| 292 | |
| 293 | cmpwi 0, 12, 3 |
| 294 | blt 0, .Lcall # if (argc < 3) goto .Lcall |
| 295 | |
| 296 | lwz 7, 8(4) # r7 = p_argv[2] |
| 297 | |
| 298 | cmpwi 0, 12, 4 |
| 299 | blt 0, .Lcall # if (argc < 4) goto .Lcall |
| 300 | |
| 301 | lwz 8, 12(4) # r8 = p_argv[3] |
| 302 | |
| 303 | cmpwi 0, 12, 5 |
| 304 | blt 0, .Lcall # if (argc < 5) goto .Lcall |
| 305 | |
| 306 | lwz 9, 16(4) # r9 = p_argv[4] |
| 307 | |
| 308 | cmpwi 0, 12, 6 |
| 309 | blt 0, .Lcall # if (argc < 6) goto .Lcall |
| 310 | |
| 311 | lwz 10, 20(4) # r10 = p_argv[5] |
| 312 | |
| 313 | cmpwi 0, 12, 7 |
| 314 | blt 0, .Lcall # if (argc < 7) goto .Lcall |
| 315 | |
| 316 | // There are more than 6 microtask parameters, so we need to store the |
| 317 | // remainder to the stack. |
| 318 | addi 12, 12, -6 # argc -= 6 |
| 319 | mtctr 12 |
| 320 | |
| 321 | // These are set to 4 bytes before the first desired store address (we're using |
| 322 | // pre-increment loads and stores in the loop below). The parameter save area |
| 323 | // for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. |
| 324 | addi 4, 4, 20 # p_argv = p_argv + 5 |
| 325 | # (i.e. skip the 5 elements we already processed) |
| 326 | addi 12, 1, 52 # r12 = stack offset (56 - 4) |
| 327 | |
| 328 | .Lnext: |
| 329 | lwzu 0, 4(4) |
| 330 | stwu 0, 4(12) |
| 331 | bdnz .Lnext |
| 332 | |
| 333 | .Lcall: |
| 334 | stw 2, 20(1) # Save the TOC pointer to the linkage area |
| 335 | // Load the actual function address from the function descriptor. |
| 336 | lwz 12, 0(3) # Function address |
| 337 | lwz 2, 4(3) # TOC pointer |
| 338 | lwz 11, 8(3) # Environment pointer |
| 339 | |
| 340 | addi 3, 31, -12 # r3 = >id |
| 341 | addi 4, 31, -16 # r4 = &tid |
| 342 | |
| 343 | mtctr 12 # CTR = function address |
| 344 | bctrl # Branch to CTR |
| 345 | lwz 2, 20(1) # Restore TOC pointer from linkage area |
| 346 | |
| 347 | #if OMPT_SUPPORT |
| 348 | li 3, 0 |
| 349 | stw 3, 0(30) |
| 350 | #endif |
| 351 | |
| 352 | li 3, 1 |
| 353 | |
| 354 | #if OMPT_SUPPORT |
| 355 | lwz 30, -8(31) # Restore r30 from the saved value on the stack |
| 356 | #endif |
| 357 | |
| 358 | mr 1, 31 |
| 359 | lwz 31, -4(1) # Restore r31 from the saved value on the stack |
| 360 | lwz 0, 8(1) |
| 361 | mtlr 0 # Restore LR from the linkage area |
| 362 | blr # Branch to LR |
| 363 | |
| 364 | #endif // KMP_ARCH_PPC64_XCOFF |
| 365 | |
| 366 | .Lfunc_end0: |
| 367 | .vbyte 4, 0x00000000 # Traceback table begin |
| 368 | .byte 0x00 # Version = 0 |
| 369 | .byte 0x09 # Language = CPlusPlus |
| 370 | .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue |
| 371 | # +HasTraceBackTableOffset, -IsInternalProcedure |
| 372 | # -HasControlledStorage, -IsTOCless |
| 373 | # -IsFloatingPointPresent |
| 374 | # -IsFloatingPointOperationLogOrAbortEnabled |
| 375 | .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed |
| 376 | # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved |
| 377 | .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 |
| 378 | #if OMPT_SUPPORT |
| 379 | .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 |
| 380 | .byte 0x06 # NumberOfFixedParms = 6 |
| 381 | #else |
| 382 | .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 |
| 383 | .byte 0x05 # NumberOfFixedParms = 5 |
| 384 | #endif |
| 385 | .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack |
| 386 | .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i |
| 387 | .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size |
| 388 | .vbyte 2, 0x0016 # Function name len = 22 |
| 389 | .byte "__kmp_invoke_microtask" # Function Name |
| 390 | .byte 0x1f # AllocaRegister = 31 |
| 391 | # -- End function |
| 392 | |
| 393 | // -- End __kmp_invoke_microtask |
| 394 | |
| 395 | // Support for unnamed common blocks. |
| 396 | |
| 397 | .comm .gomp_critical_user_, 32, 3 |
| 398 | #if KMP_ARCH_PPC64_XCOFF |
| 399 | .csect __kmp_unnamed_critical_addr[RW],3 |
| 400 | #else |
| 401 | .csect __kmp_unnamed_critical_addr[RW],2 |
| 402 | #endif |
| 403 | .globl __kmp_unnamed_critical_addr[RW] |
| 404 | .ptr .gomp_critical_user_ |
| 405 | |
| 406 | // -- End unnamed common block |
| 407 | |
| 408 | .toc |
| 409 | |
| 410 | #endif // KMP_OS_AIX |
| 411 | |