1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
2 | /* |
3 | * This file contains miscellaneous low-level functions. |
4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
5 | * |
6 | * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) |
7 | * and Paul Mackerras. |
8 | * |
9 | */ |
10 | |
11 | #include <linux/export.h> |
12 | #include <linux/sys.h> |
13 | #include <asm/unistd.h> |
14 | #include <asm/errno.h> |
15 | #include <asm/reg.h> |
16 | #include <asm/page.h> |
17 | #include <asm/cache.h> |
18 | #include <asm/cputable.h> |
19 | #include <asm/mmu.h> |
20 | #include <asm/ppc_asm.h> |
21 | #include <asm/thread_info.h> |
22 | #include <asm/asm-offsets.h> |
23 | #include <asm/processor.h> |
24 | #include <asm/bug.h> |
25 | #include <asm/ptrace.h> |
26 | #include <asm/feature-fixups.h> |
27 | |
28 | .text |
29 | |
30 | /* |
31 | * This returns the high 64 bits of the product of two 64-bit numbers. |
32 | */ |
33 | _GLOBAL(mulhdu) |
34 | cmpwi r6,0 |
35 | cmpwi cr1,r3,0 |
36 | mr r10,r4 |
37 | mulhwu r4,r4,r5 |
38 | beq 1f |
39 | mulhwu r0,r10,r6 |
40 | mullw r7,r10,r5 |
41 | addc r7,r0,r7 |
42 | addze r4,r4 |
43 | 1: beqlr cr1 /* all done if high part of A is 0 */ |
44 | mullw r9,r3,r5 |
45 | mulhwu r10,r3,r5 |
46 | beq 2f |
47 | mullw r0,r3,r6 |
48 | mulhwu r8,r3,r6 |
49 | addc r7,r0,r7 |
50 | adde r4,r4,r8 |
51 | addze r10,r10 |
52 | 2: addc r4,r4,r9 |
53 | addze r3,r10 |
54 | blr |
55 | |
56 | /* |
57 | * reloc_got2 runs through the .got2 section adding an offset |
58 | * to each entry. |
59 | */ |
60 | _GLOBAL(reloc_got2) |
61 | mflr r11 |
62 | lis r7,__got2_start@ha |
63 | addi r7,r7,__got2_start@l |
64 | lis r8,__got2_end@ha |
65 | addi r8,r8,__got2_end@l |
66 | subf r8,r7,r8 |
67 | srwi. r8,r8,2 |
68 | beqlr |
69 | mtctr r8 |
70 | bcl 20,31,$+4 |
71 | 1: mflr r0 |
72 | lis r4,1b@ha |
73 | addi r4,r4,1b@l |
74 | subf r0,r4,r0 |
75 | add r7,r0,r7 |
76 | 2: lwz r0,0(r7) |
77 | add r0,r0,r3 |
78 | stw r0,0(r7) |
79 | addi r7,r7,4 |
80 | bdnz 2b |
81 | mtlr r11 |
82 | blr |
83 | |
84 | /* |
85 | * call_setup_cpu - call the setup_cpu function for this cpu |
86 | * r3 = data offset, r24 = cpu number |
87 | * |
88 | * Setup function is called with: |
89 | * r3 = data offset |
90 | * r4 = ptr to CPU spec (relocated) |
91 | */ |
92 | _GLOBAL(call_setup_cpu) |
93 | addis r4,r3,cur_cpu_spec@ha |
94 | addi r4,r4,cur_cpu_spec@l |
95 | lwz r4,0(r4) |
96 | add r4,r4,r3 |
97 | lwz r5,CPU_SPEC_SETUP(r4) |
98 | cmpwi 0,r5,0 |
99 | add r5,r5,r3 |
100 | beqlr |
101 | mtctr r5 |
102 | bctr |
103 | |
104 | #if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32) |
105 | |
106 | /* This gets called by via-pmu.c to switch the PLL selection |
107 | * on 750fx CPU. This function should really be moved to some |
108 | * other place (as most of the cpufreq code in via-pmu |
109 | */ |
110 | _GLOBAL(low_choose_750fx_pll) |
111 | /* Clear MSR:EE */ |
112 | mfmsr r7 |
113 | rlwinm r0,r7,0,17,15 |
114 | mtmsr r0 |
115 | |
116 | /* If switching to PLL1, disable HID0:BTIC */ |
117 | cmplwi cr0,r3,0 |
118 | beq 1f |
119 | mfspr r5,SPRN_HID0 |
120 | rlwinm r5,r5,0,27,25 |
121 | sync |
122 | mtspr SPRN_HID0,r5 |
123 | isync |
124 | sync |
125 | |
126 | 1: |
127 | /* Calc new HID1 value */ |
128 | mfspr r4,SPRN_HID1 /* Build a HID1:PS bit from parameter */ |
129 | rlwinm r5,r3,16,15,15 /* Clear out HID1:PS from value read */ |
130 | rlwinm r4,r4,0,16,14 /* Could have I used rlwimi here ? */ |
131 | or r4,r4,r5 |
132 | mtspr SPRN_HID1,r4 |
133 | |
134 | #ifdef CONFIG_SMP |
135 | /* Store new HID1 image */ |
136 | lwz r6,TASK_CPU(r2) |
137 | slwi r6,r6,2 |
138 | #else |
139 | li r6, 0 |
140 | #endif |
141 | addis r6,r6,nap_save_hid1@ha |
142 | stw r4,nap_save_hid1@l(r6) |
143 | |
144 | /* If switching to PLL0, enable HID0:BTIC */ |
145 | cmplwi cr0,r3,0 |
146 | bne 1f |
147 | mfspr r5,SPRN_HID0 |
148 | ori r5,r5,HID0_BTIC |
149 | sync |
150 | mtspr SPRN_HID0,r5 |
151 | isync |
152 | sync |
153 | |
154 | 1: |
155 | /* Return */ |
156 | mtmsr r7 |
157 | blr |
158 | |
159 | _GLOBAL(low_choose_7447a_dfs) |
160 | /* Clear MSR:EE */ |
161 | mfmsr r7 |
162 | rlwinm r0,r7,0,17,15 |
163 | mtmsr r0 |
164 | |
165 | /* Calc new HID1 value */ |
166 | mfspr r4,SPRN_HID1 |
167 | insrwi r4,r3,1,9 /* insert parameter into bit 9 */ |
168 | sync |
169 | mtspr SPRN_HID1,r4 |
170 | sync |
171 | isync |
172 | |
173 | /* Return */ |
174 | mtmsr r7 |
175 | blr |
176 | |
177 | #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ |
178 | |
179 | #ifdef CONFIG_40x |
180 | |
181 | /* |
182 | * Do an IO access in real mode |
183 | */ |
184 | _GLOBAL(real_readb) |
185 | mfmsr r7 |
186 | rlwinm r0,r7,0,~MSR_DR |
187 | sync |
188 | mtmsr r0 |
189 | sync |
190 | isync |
191 | lbz r3,0(r3) |
192 | sync |
193 | mtmsr r7 |
194 | sync |
195 | isync |
196 | blr |
197 | _ASM_NOKPROBE_SYMBOL(real_readb) |
198 | |
199 | /* |
200 | * Do an IO access in real mode |
201 | */ |
202 | _GLOBAL(real_writeb) |
203 | mfmsr r7 |
204 | rlwinm r0,r7,0,~MSR_DR |
205 | sync |
206 | mtmsr r0 |
207 | sync |
208 | isync |
209 | stb r3,0(r4) |
210 | sync |
211 | mtmsr r7 |
212 | sync |
213 | isync |
214 | blr |
215 | _ASM_NOKPROBE_SYMBOL(real_writeb) |
216 | |
217 | #endif /* CONFIG_40x */ |
218 | |
219 | /* |
220 | * Copy a whole page. We use the dcbz instruction on the destination |
221 | * to reduce memory traffic (it eliminates the unnecessary reads of |
222 | * the destination into cache). This requires that the destination |
223 | * is cacheable. |
224 | */ |
225 | #define COPY_16_BYTES \ |
226 | lwz r6,4(r4); \ |
227 | lwz r7,8(r4); \ |
228 | lwz r8,12(r4); \ |
229 | lwzu r9,16(r4); \ |
230 | stw r6,4(r3); \ |
231 | stw r7,8(r3); \ |
232 | stw r8,12(r3); \ |
233 | stwu r9,16(r3) |
234 | |
235 | _GLOBAL(copy_page) |
236 | rlwinm r5, r3, 0, L1_CACHE_BYTES - 1 |
237 | addi r3,r3,-4 |
238 | |
239 | 0: twnei r5, 0 /* WARN if r3 is not cache aligned */ |
240 | EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING |
241 | |
242 | addi r4,r4,-4 |
243 | |
244 | li r5,4 |
245 | |
246 | #if MAX_COPY_PREFETCH > 1 |
247 | li r0,MAX_COPY_PREFETCH |
248 | li r11,4 |
249 | mtctr r0 |
250 | 11: dcbt r11,r4 |
251 | addi r11,r11,L1_CACHE_BYTES |
252 | bdnz 11b |
253 | #else /* MAX_COPY_PREFETCH == 1 */ |
254 | dcbt r5,r4 |
255 | li r11,L1_CACHE_BYTES+4 |
256 | #endif /* MAX_COPY_PREFETCH */ |
257 | li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH |
258 | crclr 4*cr0+eq |
259 | 2: |
260 | mtctr r0 |
261 | 1: |
262 | dcbt r11,r4 |
263 | dcbz r5,r3 |
264 | COPY_16_BYTES |
265 | #if L1_CACHE_BYTES >= 32 |
266 | COPY_16_BYTES |
267 | #if L1_CACHE_BYTES >= 64 |
268 | COPY_16_BYTES |
269 | COPY_16_BYTES |
270 | #if L1_CACHE_BYTES >= 128 |
271 | COPY_16_BYTES |
272 | COPY_16_BYTES |
273 | COPY_16_BYTES |
274 | COPY_16_BYTES |
275 | #endif |
276 | #endif |
277 | #endif |
278 | bdnz 1b |
279 | beqlr |
280 | crnot 4*cr0+eq,4*cr0+eq |
281 | li r0,MAX_COPY_PREFETCH |
282 | li r11,4 |
283 | b 2b |
284 | EXPORT_SYMBOL(copy_page) |
285 | |
286 | /* |
287 | * Extended precision shifts. |
288 | * |
289 | * Updated to be valid for shift counts from 0 to 63 inclusive. |
290 | * -- Gabriel |
291 | * |
292 | * R3/R4 has 64 bit value |
293 | * R5 has shift count |
294 | * result in R3/R4 |
295 | * |
296 | * ashrdi3: arithmetic right shift (sign propagation) |
297 | * lshrdi3: logical right shift |
298 | * ashldi3: left shift |
299 | */ |
300 | _GLOBAL(__ashrdi3) |
301 | subfic r6,r5,32 |
302 | srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count |
303 | addi r7,r5,32 # could be xori, or addi with -32 |
304 | slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) |
305 | rlwinm r8,r7,0,32 # t3 = (count < 32) ? 32 : 0 |
306 | sraw r7,r3,r7 # t2 = MSW >> (count-32) |
307 | or r4,r4,r6 # LSW |= t1 |
308 | slw r7,r7,r8 # t2 = (count < 32) ? 0 : t2 |
309 | sraw r3,r3,r5 # MSW = MSW >> count |
310 | or r4,r4,r7 # LSW |= t2 |
311 | blr |
312 | EXPORT_SYMBOL(__ashrdi3) |
313 | |
314 | _GLOBAL(__ashldi3) |
315 | subfic r6,r5,32 |
316 | slw r3,r3,r5 # MSW = count > 31 ? 0 : MSW << count |
317 | addi r7,r5,32 # could be xori, or addi with -32 |
318 | srw r6,r4,r6 # t1 = count > 31 ? 0 : LSW >> (32-count) |
319 | slw r7,r4,r7 # t2 = count < 32 ? 0 : LSW << (count-32) |
320 | or r3,r3,r6 # MSW |= t1 |
321 | slw r4,r4,r5 # LSW = LSW << count |
322 | or r3,r3,r7 # MSW |= t2 |
323 | blr |
324 | EXPORT_SYMBOL(__ashldi3) |
325 | |
326 | _GLOBAL(__lshrdi3) |
327 | subfic r6,r5,32 |
328 | srw r4,r4,r5 # LSW = count > 31 ? 0 : LSW >> count |
329 | addi r7,r5,32 # could be xori, or addi with -32 |
330 | slw r6,r3,r6 # t1 = count > 31 ? 0 : MSW << (32-count) |
331 | srw r7,r3,r7 # t2 = count < 32 ? 0 : MSW >> (count-32) |
332 | or r4,r4,r6 # LSW |= t1 |
333 | srw r3,r3,r5 # MSW = MSW >> count |
334 | or r4,r4,r7 # LSW |= t2 |
335 | blr |
336 | EXPORT_SYMBOL(__lshrdi3) |
337 | |
338 | /* |
339 | * 64-bit comparison: __cmpdi2(s64 a, s64 b) |
340 | * Returns 0 if a < b, 1 if a == b, 2 if a > b. |
341 | */ |
342 | _GLOBAL(__cmpdi2) |
343 | cmpw r3,r5 |
344 | li r3,1 |
345 | bne 1f |
346 | cmplw r4,r6 |
347 | beqlr |
348 | 1: li r3,0 |
349 | bltlr |
350 | li r3,2 |
351 | blr |
352 | EXPORT_SYMBOL(__cmpdi2) |
353 | /* |
354 | * 64-bit comparison: __ucmpdi2(u64 a, u64 b) |
355 | * Returns 0 if a < b, 1 if a == b, 2 if a > b. |
356 | */ |
357 | _GLOBAL(__ucmpdi2) |
358 | cmplw r3,r5 |
359 | li r3,1 |
360 | bne 1f |
361 | cmplw r4,r6 |
362 | beqlr |
363 | 1: li r3,0 |
364 | bltlr |
365 | li r3,2 |
366 | blr |
367 | EXPORT_SYMBOL(__ucmpdi2) |
368 | |
369 | _GLOBAL(__bswapdi2) |
370 | rotlwi r9,r4,8 |
371 | rotlwi r10,r3,8 |
372 | rlwimi r9,r4,24,0,7 |
373 | rlwimi r10,r3,24,0,7 |
374 | rlwimi r9,r4,24,16,23 |
375 | rlwimi r10,r3,24,16,23 |
376 | mr r3,r9 |
377 | mr r4,r10 |
378 | blr |
379 | EXPORT_SYMBOL(__bswapdi2) |
380 | |
381 | #ifdef CONFIG_SMP |
382 | _GLOBAL(start_secondary_resume) |
383 | /* Reset stack */ |
384 | rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT |
385 | addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE |
386 | li r3,0 |
387 | stw r3,0(r1) /* Zero the stack frame pointer */ |
388 | bl start_secondary |
389 | b . |
390 | #endif /* CONFIG_SMP */ |
391 | |