1 | /* Thread-local storage handling in the ELF dynamic linker. ARM version. |
2 | Copyright (C) 2006-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | #include <arm-features.h> |
21 | #include <tls.h> |
22 | #include <rtld-global-offsets.h> |
23 | #include "tlsdesc.h" |
24 | |
25 | .text |
26 | @ emit debug information with cfi |
27 | @ use arm-specific pseudos for unwinding itself |
28 | CFI_SECTIONS |
29 | .hidden _dl_tlsdesc_return |
30 | .global _dl_tlsdesc_return |
31 | .type _dl_tlsdesc_return,#function |
32 | cfi_startproc |
33 | eabi_fnstart |
34 | .align 2 |
35 | _dl_tlsdesc_return: |
36 | ldr r0, [r0] |
37 | BX (lr) |
38 | eabi_fnend |
39 | cfi_endproc |
40 | .size _dl_tlsdesc_return, .-_dl_tlsdesc_return |
41 | |
42 | .hidden _dl_tlsdesc_undefweak |
43 | .global _dl_tlsdesc_undefweak |
44 | .type _dl_tlsdesc_undefweak,#function |
45 | cfi_startproc |
46 | eabi_fnstart |
47 | .align 2 |
48 | _dl_tlsdesc_undefweak: |
49 | GET_TLS (r1) |
50 | rsb r0, r0, #0 |
51 | BX (lr) |
52 | cfi_endproc |
53 | eabi_fnend |
54 | .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak |
55 | |
56 | #ifdef SHARED |
57 | .hidden _dl_tlsdesc_dynamic |
58 | .global _dl_tlsdesc_dynamic |
59 | .type _dl_tlsdesc_dynamic,#function |
60 | |
61 | |
62 | /* |
63 | The assembly code that follows is a rendition of the following |
64 | C code, hand-optimized a little bit. |
65 | |
66 | ptrdiff_t |
67 | _dl_tlsdesc_dynamic(struct tlsdesc *tdp) |
68 | { |
69 | struct tlsdesc_dynamic_arg *td = tdp->argument.pointer; |
70 | dtv_t *dtv = (dtv_t *)THREAD_DTV(); |
71 | if (__builtin_expect (td->gen_count <= dtv[0].counter |
72 | && dtv[td->tlsinfo.ti_module].pointer.val |
73 | != TLS_DTV_UNALLOCATED, |
74 | 1)) |
75 | return dtv[td->tlsinfo.ti_module].pointer.val + |
76 | td->tlsinfo.ti_offset - __builtin_thread_pointer(); |
77 | |
78 | return __tls_get_addr (&td->tlsinfo) - __builtin_thread_pointer(); |
79 | } |
80 | |
81 | */ |
82 | cfi_startproc |
83 | eabi_fnstart |
84 | .align 2 |
85 | _dl_tlsdesc_dynamic: |
86 | /* Our calling convention is to clobber r0, r1 and the processor |
87 | flags. All others that are modified must be saved. r5 is |
88 | used as the hwcap value to avoid reload after __tls_get_addr |
89 | call. If required we will save the vector register on the slow |
90 | path. */ |
91 | eabi_save ({r2,r3,r4,r5,ip,lr}) |
92 | push {r2,r3,r4,r5,ip,lr} |
93 | cfi_adjust_cfa_offset (24) |
94 | cfi_rel_offset (r2,0) |
95 | cfi_rel_offset (r3,4) |
96 | cfi_rel_offset (r4,8) |
97 | cfi_rel_offset (r5,12) |
98 | cfi_rel_offset (ip,16) |
99 | cfi_rel_offset (lr,20) |
100 | |
101 | ldr r1, [r0] /* td */ |
102 | GET_TLS (lr) |
103 | mov r4, r0 /* r4 = tp */ |
104 | ldr r0, [r0] |
105 | ldr r2, [r1, #8] /* gen_count */ |
106 | ldr r3, [r0] |
107 | cmp r2, r3 |
108 | bhi 1f |
109 | ldr r3, [r1] |
110 | #ifndef ARM_NO_INDEX_REGISTER |
111 | ldr r2, [r0, r3, lsl #3] |
112 | #else |
113 | add lr, r0, r3, lsl #3 |
114 | ldr r2, [lr] |
115 | #endif |
116 | cmn r2, #1 |
117 | ittt ne |
118 | ldrne r3, [r1, #4] |
119 | addne r3, r2, r3 |
120 | rsbne r0, r4, r3 |
121 | bne 2f |
122 | 1: mov r0, r1 |
123 | |
124 | /* Load the hwcap to check for vector support. */ |
125 | ldr r2, 3f |
126 | ldr r1, .Lrtld_global_ro |
127 | 0: add r2, pc, r2 |
128 | ldr r2, [r2, r1] |
129 | ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET] |
130 | |
131 | #ifdef __SOFTFP__ |
132 | tst r5, #HWCAP_ARM_VFP |
133 | beq .Lno_vfp |
134 | #endif |
135 | |
136 | /* Store the VFP registers. Don't use VFP instructions directly |
137 | because this code is used in non-VFP multilibs. */ |
138 | #define VFP_STACK_REQ (32*8 + 8) |
139 | sub sp, sp, VFP_STACK_REQ |
140 | cfi_adjust_cfa_offset (VFP_STACK_REQ) |
141 | mov r3, sp |
142 | .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */ |
143 | tst r5, #HWCAP_ARM_VFPD32 |
144 | beq 4f |
145 | .inst 0xece30b20 /* vstmia r3!, {d16-d31} */ |
146 | /* Store the floating-point status register. */ |
147 | 4: .inst 0xeef12a10 /* vmrs r2, fpscr */ |
148 | str r2, [r3] |
149 | .Lno_vfp: |
150 | bl __tls_get_addr |
151 | rsb r0, r4, r0 |
152 | #ifdef __SOFTFP__ |
153 | tst r5, #HWCAP_ARM_VFP |
154 | beq 2f |
155 | #endif |
156 | mov r3, sp |
157 | .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */ |
158 | tst r5, #HWCAP_ARM_VFPD32 |
159 | beq 5f |
160 | .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */ |
161 | ldr r4, [r3] |
162 | 5: .inst 0xeee14a10 /* vmsr fpscr, r4 */ |
163 | add sp, sp, VFP_STACK_REQ |
164 | cfi_adjust_cfa_offset (-VFP_STACK_REQ) |
165 | |
166 | 2: |
167 | #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ |
168 | || defined (ARM_ALWAYS_BX)) |
169 | pop {r2,r3,r4,r5,ip, lr} |
170 | cfi_adjust_cfa_offset (-20) |
171 | cfi_restore (lr) |
172 | cfi_restore (ip) |
173 | cfi_restore (r5) |
174 | cfi_restore (r4) |
175 | cfi_restore (r3) |
176 | cfi_restore (r2) |
177 | bx lr |
178 | #else |
179 | pop {r2,r3,r4,r5,ip, pc} |
180 | #endif |
181 | eabi_fnend |
182 | cfi_endproc |
183 | .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic |
184 | |
185 | 3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS |
186 | .Lrtld_global_ro: |
187 | .long C_SYMBOL_NAME(_rtld_global_ro)(GOT) |
188 | #endif /* SHARED */ |
189 | |