1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * trampoline.S: Jump start slave processors on sparc64. |
4 | * |
5 | * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) |
6 | */ |
7 | |
8 | |
9 | #include <linux/pgtable.h> |
10 | #include <asm/head.h> |
11 | #include <asm/asi.h> |
12 | #include <asm/lsu.h> |
13 | #include <asm/dcr.h> |
14 | #include <asm/dcu.h> |
15 | #include <asm/pstate.h> |
16 | #include <asm/page.h> |
17 | #include <asm/spitfire.h> |
18 | #include <asm/processor.h> |
19 | #include <asm/thread_info.h> |
20 | #include <asm/mmu.h> |
21 | #include <asm/hypervisor.h> |
22 | #include <asm/cpudata.h> |
23 | |
24 | .data |
25 | .align 8 |
26 | call_method: |
27 | .asciz "call-method" |
28 | .align 8 |
29 | itlb_load: |
30 | .asciz "SUNW,itlb-load" |
31 | .align 8 |
32 | dtlb_load: |
33 | .asciz "SUNW,dtlb-load" |
34 | |
35 | #define TRAMP_STACK_SIZE 1024 |
36 | .align 16 |
37 | tramp_stack: |
38 | .skip TRAMP_STACK_SIZE |
39 | |
40 | .align 8 |
41 | .globl sparc64_cpu_startup, sparc64_cpu_startup_end |
42 | sparc64_cpu_startup: |
43 | BRANCH_IF_SUN4V(g1, niagara_startup) |
44 | BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) |
45 | BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) |
46 | |
47 | ba,pt %xcc, spitfire_startup |
48 | nop |
49 | |
50 | cheetah_plus_startup: |
51 | /* Preserve OBP chosen DCU and DCR register settings. */ |
52 | ba,pt %xcc, cheetah_generic_startup |
53 | nop |
54 | |
55 | cheetah_startup: |
56 | mov DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1 |
57 | wr %g1, %asr18 |
58 | |
59 | sethi %uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 |
60 | or %g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5 |
61 | sllx %g5, 32, %g5 |
62 | or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 |
63 | stxa %g5, [%g0] ASI_DCU_CONTROL_REG |
64 | membar #Sync |
65 | /* fallthru */ |
66 | |
67 | cheetah_generic_startup: |
68 | mov TSB_EXTENSION_P, %g3 |
69 | stxa %g0, [%g3] ASI_DMMU |
70 | stxa %g0, [%g3] ASI_IMMU |
71 | membar #Sync |
72 | |
73 | mov TSB_EXTENSION_S, %g3 |
74 | stxa %g0, [%g3] ASI_DMMU |
75 | membar #Sync |
76 | |
77 | mov TSB_EXTENSION_N, %g3 |
78 | stxa %g0, [%g3] ASI_DMMU |
79 | stxa %g0, [%g3] ASI_IMMU |
80 | membar #Sync |
81 | /* fallthru */ |
82 | |
83 | niagara_startup: |
84 | /* Disable STICK_INT interrupts. */ |
85 | sethi %hi(0x80000000), %g5 |
86 | sllx %g5, 32, %g5 |
87 | wr %g5, %asr25 |
88 | |
89 | ba,pt %xcc, startup_continue |
90 | nop |
91 | |
92 | spitfire_startup: |
93 | mov (LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1 |
94 | stxa %g1, [%g0] ASI_LSU_CONTROL |
95 | membar #Sync |
96 | |
97 | startup_continue: |
98 | mov %o0, %l0 |
99 | BRANCH_IF_SUN4V(g1, niagara_lock_tlb) |
100 | |
101 | sethi %hi(0x80000000), %g2 |
102 | sllx %g2, 32, %g2 |
103 | wr %g2, 0, %tick_cmpr |
104 | |
105 | /* Call OBP by hand to lock KERNBASE into i/d tlbs. |
106 | * We lock 'num_kernel_image_mappings' consequetive entries. |
107 | */ |
108 | sethi %hi(prom_entry_lock), %g2 |
109 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
110 | brnz,pn %g1, 1b |
111 | nop |
112 | |
113 | /* Get onto temporary stack which will be in the locked |
114 | * kernel image. |
115 | */ |
116 | sethi %hi(tramp_stack), %g1 |
117 | or %g1, %lo(tramp_stack), %g1 |
118 | add %g1, TRAMP_STACK_SIZE, %g1 |
119 | sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp |
120 | flushw |
121 | |
122 | /* Setup the loop variables: |
123 | * %l3: VADDR base |
124 | * %l4: TTE base |
125 | * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings' |
126 | * %l6: Number of TTE entries to map |
127 | * %l7: Highest TTE entry number, we count down |
128 | */ |
129 | sethi %hi(KERNBASE), %l3 |
130 | sethi %hi(kern_locked_tte_data), %l4 |
131 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 |
132 | clr %l5 |
133 | sethi %hi(num_kernel_image_mappings), %l6 |
134 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 |
135 | |
136 | mov 15, %l7 |
137 | BRANCH_IF_ANY_CHEETAH(g1,g5,2f) |
138 | |
139 | mov 63, %l7 |
140 | 2: |
141 | |
142 | 3: |
143 | /* Lock into I-MMU */ |
144 | sethi %hi(call_method), %g2 |
145 | or %g2, %lo(call_method), %g2 |
146 | stx %g2, [%sp + 2047 + 128 + 0x00] |
147 | mov 5, %g2 |
148 | stx %g2, [%sp + 2047 + 128 + 0x08] |
149 | mov 1, %g2 |
150 | stx %g2, [%sp + 2047 + 128 + 0x10] |
151 | sethi %hi(itlb_load), %g2 |
152 | or %g2, %lo(itlb_load), %g2 |
153 | stx %g2, [%sp + 2047 + 128 + 0x18] |
154 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
155 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
156 | stx %g2, [%sp + 2047 + 128 + 0x20] |
157 | |
158 | /* Each TTE maps 4MB, convert index to offset. */ |
159 | sllx %l5, 22, %g1 |
160 | |
161 | add %l3, %g1, %g2 |
162 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
163 | add %l4, %g1, %g2 |
164 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE |
165 | |
166 | /* TTE index is highest minus loop index. */ |
167 | sub %l7, %l5, %g2 |
168 | stx %g2, [%sp + 2047 + 128 + 0x38] |
169 | |
170 | sethi %hi(p1275buf), %g2 |
171 | or %g2, %lo(p1275buf), %g2 |
172 | ldx [%g2 + 0x08], %o1 |
173 | call %o1 |
174 | add %sp, (2047 + 128), %o0 |
175 | |
176 | /* Lock into D-MMU */ |
177 | sethi %hi(call_method), %g2 |
178 | or %g2, %lo(call_method), %g2 |
179 | stx %g2, [%sp + 2047 + 128 + 0x00] |
180 | mov 5, %g2 |
181 | stx %g2, [%sp + 2047 + 128 + 0x08] |
182 | mov 1, %g2 |
183 | stx %g2, [%sp + 2047 + 128 + 0x10] |
184 | sethi %hi(dtlb_load), %g2 |
185 | or %g2, %lo(dtlb_load), %g2 |
186 | stx %g2, [%sp + 2047 + 128 + 0x18] |
187 | sethi %hi(prom_mmu_ihandle_cache), %g2 |
188 | lduw [%g2 + %lo(prom_mmu_ihandle_cache)], %g2 |
189 | stx %g2, [%sp + 2047 + 128 + 0x20] |
190 | |
191 | /* Each TTE maps 4MB, convert index to offset. */ |
192 | sllx %l5, 22, %g1 |
193 | |
194 | add %l3, %g1, %g2 |
195 | stx %g2, [%sp + 2047 + 128 + 0x28] ! VADDR |
196 | add %l4, %g1, %g2 |
197 | stx %g2, [%sp + 2047 + 128 + 0x30] ! TTE |
198 | |
199 | /* TTE index is highest minus loop index. */ |
200 | sub %l7, %l5, %g2 |
201 | stx %g2, [%sp + 2047 + 128 + 0x38] |
202 | |
203 | sethi %hi(p1275buf), %g2 |
204 | or %g2, %lo(p1275buf), %g2 |
205 | ldx [%g2 + 0x08], %o1 |
206 | call %o1 |
207 | add %sp, (2047 + 128), %o0 |
208 | |
209 | add %l5, 1, %l5 |
210 | cmp %l5, %l6 |
211 | bne,pt %xcc, 3b |
212 | nop |
213 | |
214 | sethi %hi(prom_entry_lock), %g2 |
215 | stb %g0, [%g2 + %lo(prom_entry_lock)] |
216 | |
217 | ba,pt %xcc, after_lock_tlb |
218 | nop |
219 | |
220 | niagara_lock_tlb: |
221 | sethi %hi(KERNBASE), %l3 |
222 | sethi %hi(kern_locked_tte_data), %l4 |
223 | ldx [%l4 + %lo(kern_locked_tte_data)], %l4 |
224 | clr %l5 |
225 | sethi %hi(num_kernel_image_mappings), %l6 |
226 | lduw [%l6 + %lo(num_kernel_image_mappings)], %l6 |
227 | |
228 | 1: |
229 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
230 | sllx %l5, 22, %g2 |
231 | add %l3, %g2, %o0 |
232 | clr %o1 |
233 | add %l4, %g2, %o2 |
234 | mov HV_MMU_IMMU, %o3 |
235 | ta HV_FAST_TRAP |
236 | |
237 | mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 |
238 | sllx %l5, 22, %g2 |
239 | add %l3, %g2, %o0 |
240 | clr %o1 |
241 | add %l4, %g2, %o2 |
242 | mov HV_MMU_DMMU, %o3 |
243 | ta HV_FAST_TRAP |
244 | |
245 | add %l5, 1, %l5 |
246 | cmp %l5, %l6 |
247 | bne,pt %xcc, 1b |
248 | nop |
249 | |
250 | after_lock_tlb: |
251 | wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate |
252 | wr %g0, 0, %fprs |
253 | |
254 | wr %g0, ASI_P, %asi |
255 | |
256 | mov PRIMARY_CONTEXT, %g7 |
257 | |
258 | 661: stxa %g0, [%g7] ASI_DMMU |
259 | .section .sun4v_1insn_patch, "ax" |
260 | .word 661b |
261 | stxa %g0, [%g7] ASI_MMU |
262 | .previous |
263 | |
264 | membar #Sync |
265 | mov SECONDARY_CONTEXT, %g7 |
266 | |
267 | 661: stxa %g0, [%g7] ASI_DMMU |
268 | .section .sun4v_1insn_patch, "ax" |
269 | .word 661b |
270 | stxa %g0, [%g7] ASI_MMU |
271 | .previous |
272 | |
273 | membar #Sync |
274 | |
275 | /* Everything we do here, until we properly take over the |
276 | * trap table, must be done with extreme care. We cannot |
277 | * make any references to %g6 (current thread pointer), |
278 | * %g4 (current task pointer), or %g5 (base of current cpu's |
279 | * per-cpu area) until we properly take over the trap table |
280 | * from the firmware and hypervisor. |
281 | * |
282 | * Get onto temporary stack which is in the locked kernel image. |
283 | */ |
284 | sethi %hi(tramp_stack), %g1 |
285 | or %g1, %lo(tramp_stack), %g1 |
286 | add %g1, TRAMP_STACK_SIZE, %g1 |
287 | sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp |
288 | mov 0, %fp |
289 | |
290 | /* Put garbage in these registers to trap any access to them. */ |
291 | set 0xdeadbeef, %g4 |
292 | set 0xdeadbeef, %g5 |
293 | set 0xdeadbeef, %g6 |
294 | |
295 | call init_irqwork_curcpu |
296 | nop |
297 | |
298 | sethi %hi(tlb_type), %g3 |
299 | lduw [%g3 + %lo(tlb_type)], %g2 |
300 | cmp %g2, 3 |
301 | bne,pt %icc, 1f |
302 | nop |
303 | |
304 | call hard_smp_processor_id |
305 | nop |
306 | |
307 | call sun4v_register_mondo_queues |
308 | nop |
309 | |
310 | 1: call init_cur_cpu_trap |
311 | ldx [%l0], %o0 |
312 | |
313 | /* Start using proper page size encodings in ctx register. */ |
314 | sethi %hi(sparc64_kern_pri_context), %g3 |
315 | ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 |
316 | mov PRIMARY_CONTEXT, %g1 |
317 | |
318 | 661: stxa %g2, [%g1] ASI_DMMU |
319 | .section .sun4v_1insn_patch, "ax" |
320 | .word 661b |
321 | stxa %g2, [%g1] ASI_MMU |
322 | .previous |
323 | |
324 | membar #Sync |
325 | |
326 | wrpr %g0, 0, %wstate |
327 | |
328 | sethi %hi(prom_entry_lock), %g2 |
329 | 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 |
330 | brnz,pn %g1, 1b |
331 | nop |
332 | |
333 | /* As a hack, put &init_thread_union into %g6. |
334 | * prom_world() loads from here to restore the %asi |
335 | * register. |
336 | */ |
337 | sethi %hi(init_thread_union), %g6 |
338 | or %g6, %lo(init_thread_union), %g6 |
339 | |
340 | sethi %hi(is_sun4v), %o0 |
341 | lduw [%o0 + %lo(is_sun4v)], %o0 |
342 | brz,pt %o0, 2f |
343 | nop |
344 | |
345 | TRAP_LOAD_TRAP_BLOCK(%g2, %g3) |
346 | add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 |
347 | stxa %g2, [%g0] ASI_SCRATCHPAD |
348 | |
349 | /* Compute physical address: |
350 | * |
351 | * paddr = kern_base + (mmfsa_vaddr - KERNBASE) |
352 | */ |
353 | sethi %hi(KERNBASE), %g3 |
354 | sub %g2, %g3, %g2 |
355 | sethi %hi(kern_base), %g3 |
356 | ldx [%g3 + %lo(kern_base)], %g3 |
357 | add %g2, %g3, %o1 |
358 | sethi %hi(sparc64_ttable_tl0), %o0 |
359 | |
360 | set prom_set_trap_table_name, %g2 |
361 | stx %g2, [%sp + 2047 + 128 + 0x00] |
362 | mov 2, %g2 |
363 | stx %g2, [%sp + 2047 + 128 + 0x08] |
364 | mov 0, %g2 |
365 | stx %g2, [%sp + 2047 + 128 + 0x10] |
366 | stx %o0, [%sp + 2047 + 128 + 0x18] |
367 | stx %o1, [%sp + 2047 + 128 + 0x20] |
368 | sethi %hi(p1275buf), %g2 |
369 | or %g2, %lo(p1275buf), %g2 |
370 | ldx [%g2 + 0x08], %o1 |
371 | call %o1 |
372 | add %sp, (2047 + 128), %o0 |
373 | |
374 | ba,pt %xcc, 3f |
375 | nop |
376 | |
377 | 2: sethi %hi(sparc64_ttable_tl0), %o0 |
378 | set prom_set_trap_table_name, %g2 |
379 | stx %g2, [%sp + 2047 + 128 + 0x00] |
380 | mov 1, %g2 |
381 | stx %g2, [%sp + 2047 + 128 + 0x08] |
382 | mov 0, %g2 |
383 | stx %g2, [%sp + 2047 + 128 + 0x10] |
384 | stx %o0, [%sp + 2047 + 128 + 0x18] |
385 | sethi %hi(p1275buf), %g2 |
386 | or %g2, %lo(p1275buf), %g2 |
387 | ldx [%g2 + 0x08], %o1 |
388 | call %o1 |
389 | add %sp, (2047 + 128), %o0 |
390 | |
391 | 3: sethi %hi(prom_entry_lock), %g2 |
392 | stb %g0, [%g2 + %lo(prom_entry_lock)] |
393 | |
394 | ldx [%l0], %g6 |
395 | ldx [%g6 + TI_TASK], %g4 |
396 | |
397 | mov 1, %g5 |
398 | sllx %g5, THREAD_SHIFT, %g5 |
399 | sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 |
400 | add %g6, %g5, %sp |
401 | |
402 | rdpr %pstate, %o1 |
403 | or %o1, PSTATE_IE, %o1 |
404 | wrpr %o1, 0, %pstate |
405 | |
406 | call smp_callin |
407 | nop |
408 | |
409 | call cpu_panic |
410 | nop |
411 | 1: b,a,pt %xcc, 1b |
412 | |
413 | .align 8 |
414 | sparc64_cpu_startup_end: |
415 | |