| 1 | // RUN: %libomptarget-compilexx-run-and-check-generic |
| 2 | |
| 3 | // Assuming the stack is allocated on the host starting at high addresses, the |
| 4 | // host memory layout for the following program looks like this: |
| 5 | // |
| 6 | // low addr <----------------------------------------------------- high addr |
| 7 | // | 16 bytes | 16 bytes | 16 bytes | ? bytes | |
| 8 | // | collidePost | s | collidePre | stackPad | |
| 9 | // | | x | y | z | | | |
| 10 | // `-------------' |
| 11 | // ^ `--------' |
| 12 | // | ^ |
| 13 | // | | |
| 14 | // | `-- too much padding (< 16 bytes) for s maps here |
| 15 | // | |
| 16 | // `------------------array extension error maps here |
| 17 | // |
| 18 | // libomptarget used to add too much padding to the device allocation of s and |
| 19 | // map it back to the host at the location indicated above when all of the |
| 20 | // following conditions were true: |
| 21 | // - Multiple members (s.y and s.z below) were mapped. In this case, initial |
| 22 | // padding might be needed to ensure later mapped members (s.z) are aligned |
| 23 | // properly on the device. (If the first member in the struct, s.x, were also |
| 24 | // mapped, then the correct initial padding would always be zero.) |
| 25 | // - mod16 = &s % 16 was not a power of 2 (e.g., 0x7ffcce2b584e % 16 = 14). |
| 26 | // libomptarget then incorrectly assumed mod16 was the existing host memory |
| 27 | // alignment of s. (The fix was to only look for alignments that are powers |
| 28 | // of 2.) |
| 29 | // - &s.y % mod16 was > 1 (e.g., 0x7ffcce2b584f % 14 = 11). libomptarget added |
| 30 | // padding of that size for s, but at most 1 byte is ever actually needed. |
| 31 | // |
| 32 | // Below, we try many sizes of stackPad to try to produce those conditions. |
| 33 | // |
| 34 | // When collidePost was then mapped to the same host memory as the unnecessary |
| 35 | // padding for s, libomptarget reported an array extension error. collidePost |
| 36 | // is never fully contained within that padding (which would avoid the extension |
| 37 | // error) because collidePost is 16 bytes while the padding is always less than |
| 38 | // 16 bytes due to the modulo operations. (Later, libomptarget was changed not |
| 39 | // to consider padding to be mapped to the host, so it cannot be involved in |
| 40 | // array extension errors.) |
| 41 | |
| 42 | #include <stdint.h> |
| 43 | #include <stdio.h> |
| 44 | |
| 45 | template <typename StackPad> |
| 46 | void test() { |
| 47 | StackPad stackPad; |
| 48 | struct S { char x; char y[7]; char z[8]; }; |
| 49 | struct S collidePre, s, collidePost; |
| 50 | uintptr_t mod16 = (uintptr_t)&s % 16; |
| 51 | fprintf(stderr, "&s = %p\n" , &s); |
| 52 | fprintf(stderr, format: "&s %% 16 = %lu\n" , mod16); |
| 53 | if (mod16) { |
| 54 | fprintf(stderr, "&s.y = %p\n" , &s.y); |
| 55 | fprintf(stderr, format: "&s.y %% %lu = %lu\n" , mod16, (uintptr_t)&s.y % mod16); |
| 56 | } |
| 57 | fprintf(stderr, "&collidePre = %p\n" , &collidePre); |
| 58 | fprintf(stderr, "&collidePost = %p\n" , &collidePost); |
| 59 | #pragma omp target data map(to:s.y, s.z) |
| 60 | #pragma omp target data map(to:collidePre, collidePost) |
| 61 | ; |
| 62 | } |
| 63 | |
| 64 | #define TEST(StackPad) \ |
| 65 | fprintf(stderr, "-------------------------------------\n"); \ |
| 66 | fprintf(stderr, "StackPad=%s\n", #StackPad); \ |
| 67 | test<StackPad>() |
| 68 | |
| 69 | int main() { |
| 70 | TEST(char[1]); |
| 71 | TEST(char[2]); |
| 72 | TEST(char[3]); |
| 73 | TEST(char[4]); |
| 74 | TEST(char[5]); |
| 75 | TEST(char[6]); |
| 76 | TEST(char[7]); |
| 77 | TEST(char[8]); |
| 78 | TEST(char[9]); |
| 79 | TEST(char[10]); |
| 80 | TEST(char[11]); |
| 81 | TEST(char[12]); |
| 82 | TEST(char[13]); |
| 83 | TEST(char[14]); |
| 84 | TEST(char[15]); |
| 85 | TEST(char[16]); |
| 86 | // CHECK: pass |
| 87 | printf(format: "pass\n" ); |
| 88 | return 0; |
| 89 | } |
| 90 | |