1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* |
3 | * Copyright (C) 1991,1992 Linus Torvalds |
4 | * |
5 | * entry_32.S contains the system-call and low-level fault and trap handling routines. |
6 | * |
7 | * Stack layout while running C code: |
8 | * ptrace needs to have all registers on the stack. |
9 | * If the order here is changed, it needs to be |
10 | * updated in fork.c:copy_process(), signal.c:do_signal(), |
11 | * ptrace.c and ptrace.h |
12 | * |
13 | * 0(%esp) - %ebx |
14 | * 4(%esp) - %ecx |
15 | * 8(%esp) - %edx |
16 | * C(%esp) - %esi |
17 | * 10(%esp) - %edi |
18 | * 14(%esp) - %ebp |
19 | * 18(%esp) - %eax |
20 | * 1C(%esp) - %ds |
21 | * 20(%esp) - %es |
22 | * 24(%esp) - %fs |
23 | * 28(%esp) - unused -- was %gs on old stackprotector kernels |
24 | * 2C(%esp) - orig_eax |
25 | * 30(%esp) - %eip |
26 | * 34(%esp) - %cs |
27 | * 38(%esp) - %eflags |
28 | * 3C(%esp) - %oldesp |
29 | * 40(%esp) - %oldss |
30 | */ |
31 | |
32 | #include <linux/linkage.h> |
33 | #include <linux/err.h> |
34 | #include <asm/thread_info.h> |
35 | #include <asm/irqflags.h> |
36 | #include <asm/errno.h> |
37 | #include <asm/segment.h> |
38 | #include <asm/smp.h> |
39 | #include <asm/percpu.h> |
40 | #include <asm/processor-flags.h> |
41 | #include <asm/irq_vectors.h> |
42 | #include <asm/cpufeatures.h> |
43 | #include <asm/alternative.h> |
44 | #include <asm/asm.h> |
45 | #include <asm/smap.h> |
46 | #include <asm/frame.h> |
47 | #include <asm/trapnr.h> |
48 | #include <asm/nospec-branch.h> |
49 | |
50 | #include "calling.h" |
51 | |
52 | .section .entry.text, "ax" |
53 | |
54 | #define PTI_SWITCH_MASK (1 << PAGE_SHIFT) |
55 | |
56 | /* Unconditionally switch to user cr3 */ |
57 | .macro SWITCH_TO_USER_CR3 scratch_reg:req |
58 | ALTERNATIVE "jmp .Lend_\@" , "" , X86_FEATURE_PTI |
59 | |
60 | movl %cr3, \scratch_reg |
61 | orl $PTI_SWITCH_MASK, \scratch_reg |
62 | movl \scratch_reg, %cr3 |
63 | .Lend_\@: |
64 | .endm |
65 | |
66 | .macro BUG_IF_WRONG_CR3 no_user_check=0 |
67 | #ifdef CONFIG_DEBUG_ENTRY |
68 | ALTERNATIVE "jmp .Lend_\@" , "" , X86_FEATURE_PTI |
69 | .if \no_user_check == 0 |
70 | /* coming from usermode? */ |
71 | testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) |
72 | jz .Lend_\@ |
73 | .endif |
74 | /* On user-cr3? */ |
75 | movl %cr3, %eax |
76 | testl $PTI_SWITCH_MASK, %eax |
77 | jnz .Lend_\@ |
78 | /* From userspace with kernel cr3 - BUG */ |
79 | ud2 |
80 | .Lend_\@: |
81 | #endif |
82 | .endm |
83 | |
84 | /* |
85 | * Switch to kernel cr3 if not already loaded and return current cr3 in |
86 | * \scratch_reg |
87 | */ |
88 | .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req |
89 | ALTERNATIVE "jmp .Lend_\@" , "" , X86_FEATURE_PTI |
90 | movl %cr3, \scratch_reg |
91 | /* Test if we are already on kernel CR3 */ |
92 | testl $PTI_SWITCH_MASK, \scratch_reg |
93 | jz .Lend_\@ |
94 | andl $(~PTI_SWITCH_MASK), \scratch_reg |
95 | movl \scratch_reg, %cr3 |
96 | /* Return original CR3 in \scratch_reg */ |
97 | orl $PTI_SWITCH_MASK, \scratch_reg |
98 | .Lend_\@: |
99 | .endm |
100 | |
101 | #define CS_FROM_ENTRY_STACK (1 << 31) |
102 | #define CS_FROM_USER_CR3 (1 << 30) |
103 | #define CS_FROM_KERNEL (1 << 29) |
104 | #define CS_FROM_ESPFIX (1 << 28) |
105 | |
106 | .macro FIXUP_FRAME |
107 | /* |
108 | * The high bits of the CS dword (__csh) are used for CS_FROM_*. |
109 | * Clear them in case hardware didn't do this for us. |
110 | */ |
111 | andl $0x0000ffff, 4*4(%esp) |
112 | |
113 | #ifdef CONFIG_VM86 |
114 | testl $X86_EFLAGS_VM, 5*4(%esp) |
115 | jnz .Lfrom_usermode_no_fixup_\@ |
116 | #endif |
117 | testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) |
118 | jnz .Lfrom_usermode_no_fixup_\@ |
119 | |
120 | orl $CS_FROM_KERNEL, 4*4(%esp) |
121 | |
122 | /* |
123 | * When we're here from kernel mode; the (exception) stack looks like: |
124 | * |
125 | * 6*4(%esp) - <previous context> |
126 | * 5*4(%esp) - flags |
127 | * 4*4(%esp) - cs |
128 | * 3*4(%esp) - ip |
129 | * 2*4(%esp) - orig_eax |
130 | * 1*4(%esp) - gs / function |
131 | * 0*4(%esp) - fs |
132 | * |
133 | * Lets build a 5 entry IRET frame after that, such that struct pt_regs |
134 | * is complete and in particular regs->sp is correct. This gives us |
135 | * the original 6 entries as gap: |
136 | * |
137 | * 14*4(%esp) - <previous context> |
138 | * 13*4(%esp) - gap / flags |
139 | * 12*4(%esp) - gap / cs |
140 | * 11*4(%esp) - gap / ip |
141 | * 10*4(%esp) - gap / orig_eax |
142 | * 9*4(%esp) - gap / gs / function |
143 | * 8*4(%esp) - gap / fs |
144 | * 7*4(%esp) - ss |
145 | * 6*4(%esp) - sp |
146 | * 5*4(%esp) - flags |
147 | * 4*4(%esp) - cs |
148 | * 3*4(%esp) - ip |
149 | * 2*4(%esp) - orig_eax |
150 | * 1*4(%esp) - gs / function |
151 | * 0*4(%esp) - fs |
152 | */ |
153 | |
154 | pushl %ss # ss |
155 | pushl %esp # sp (points at ss) |
156 | addl $7*4, (%esp) # point sp back at the previous context |
157 | pushl 7*4(%esp) # flags |
158 | pushl 7*4(%esp) # cs |
159 | pushl 7*4(%esp) # ip |
160 | pushl 7*4(%esp) # orig_eax |
161 | pushl 7*4(%esp) # gs / function |
162 | pushl 7*4(%esp) # fs |
163 | .Lfrom_usermode_no_fixup_\@: |
164 | .endm |
165 | |
166 | .macro IRET_FRAME |
167 | /* |
168 | * We're called with %ds, %es, %fs, and %gs from the interrupted |
169 | * frame, so we shouldn't use them. Also, we may be in ESPFIX |
170 | * mode and therefore have a nonzero SS base and an offset ESP, |
171 | * so any attempt to access the stack needs to use SS. (except for |
172 | * accesses through %esp, which automatically use SS.) |
173 | */ |
174 | testl $CS_FROM_KERNEL, 1*4(%esp) |
175 | jz .Lfinished_frame_\@ |
176 | |
177 | /* |
178 | * Reconstruct the 3 entry IRET frame right after the (modified) |
179 | * regs->sp without lowering %esp in between, such that an NMI in the |
180 | * middle doesn't scribble our stack. |
181 | */ |
182 | pushl %eax |
183 | pushl %ecx |
184 | movl 5*4(%esp), %eax # (modified) regs->sp |
185 | |
186 | movl 4*4(%esp), %ecx # flags |
187 | movl %ecx, %ss:-1*4(%eax) |
188 | |
189 | movl 3*4(%esp), %ecx # cs |
190 | andl $0x0000ffff, %ecx |
191 | movl %ecx, %ss:-2*4(%eax) |
192 | |
193 | movl 2*4(%esp), %ecx # ip |
194 | movl %ecx, %ss:-3*4(%eax) |
195 | |
196 | movl 1*4(%esp), %ecx # eax |
197 | movl %ecx, %ss:-4*4(%eax) |
198 | |
199 | popl %ecx |
200 | lea -4*4(%eax), %esp |
201 | popl %eax |
202 | .Lfinished_frame_\@: |
203 | .endm |
204 | |
205 | .macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 |
206 | cld |
207 | .if \skip_gs == 0 |
208 | pushl $0 |
209 | .endif |
210 | pushl %fs |
211 | |
212 | pushl %eax |
213 | movl $(__KERNEL_PERCPU), %eax |
214 | movl %eax, %fs |
215 | .if \unwind_espfix > 0 |
216 | UNWIND_ESPFIX_STACK |
217 | .endif |
218 | popl %eax |
219 | |
220 | FIXUP_FRAME |
221 | pushl %es |
222 | pushl %ds |
223 | pushl \pt_regs_ax |
224 | pushl %ebp |
225 | pushl %edi |
226 | pushl %esi |
227 | pushl %edx |
228 | pushl %ecx |
229 | pushl %ebx |
230 | movl $(__USER_DS), %edx |
231 | movl %edx, %ds |
232 | movl %edx, %es |
233 | /* Switch to kernel stack if necessary */ |
234 | .if \switch_stacks > 0 |
235 | SWITCH_TO_KERNEL_STACK |
236 | .endif |
237 | .endm |
238 | |
239 | .macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 |
240 | SAVE_ALL unwind_espfix=\unwind_espfix |
241 | |
242 | BUG_IF_WRONG_CR3 |
243 | |
244 | /* |
245 | * Now switch the CR3 when PTI is enabled. |
246 | * |
247 | * We can enter with either user or kernel cr3, the code will |
248 | * store the old cr3 in \cr3_reg and switches to the kernel cr3 |
249 | * if necessary. |
250 | */ |
251 | SWITCH_TO_KERNEL_CR3 scratch_reg=\cr3_reg |
252 | |
253 | .Lend_\@: |
254 | .endm |
255 | |
256 | .macro RESTORE_INT_REGS |
257 | popl %ebx |
258 | popl %ecx |
259 | popl %edx |
260 | popl %esi |
261 | popl %edi |
262 | popl %ebp |
263 | popl %eax |
264 | .endm |
265 | |
266 | .macro RESTORE_REGS pop=0 |
267 | RESTORE_INT_REGS |
268 | 1: popl %ds |
269 | 2: popl %es |
270 | 3: popl %fs |
271 | 4: addl $(4 + \pop), %esp /* pop the unused "gs" slot */ |
272 | IRET_FRAME |
273 | |
274 | /* |
275 | * There is no _ASM_EXTABLE_TYPE_REG() for ASM, however since this is |
276 | * ASM the registers are known and we can trivially hard-code them. |
277 | */ |
278 | _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_POP_ZERO|EX_REG_DS) |
279 | _ASM_EXTABLE_TYPE(2b, 3b, EX_TYPE_POP_ZERO|EX_REG_ES) |
280 | _ASM_EXTABLE_TYPE(3b, 4b, EX_TYPE_POP_ZERO|EX_REG_FS) |
281 | .endm |
282 | |
283 | .macro RESTORE_ALL_NMI cr3_reg:req pop=0 |
284 | /* |
285 | * Now switch the CR3 when PTI is enabled. |
286 | * |
287 | * We enter with kernel cr3 and switch the cr3 to the value |
288 | * stored on \cr3_reg, which is either a user or a kernel cr3. |
289 | */ |
290 | ALTERNATIVE "jmp .Lswitched_\@" , "" , X86_FEATURE_PTI |
291 | |
292 | testl $PTI_SWITCH_MASK, \cr3_reg |
293 | jz .Lswitched_\@ |
294 | |
295 | /* User cr3 in \cr3_reg - write it to hardware cr3 */ |
296 | movl \cr3_reg, %cr3 |
297 | |
298 | .Lswitched_\@: |
299 | |
300 | BUG_IF_WRONG_CR3 |
301 | |
302 | RESTORE_REGS pop=\pop |
303 | .endm |
304 | |
305 | .macro CHECK_AND_APPLY_ESPFIX |
306 | #ifdef CONFIG_X86_ESPFIX32 |
307 | #define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) |
308 | #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page + GDT_ESPFIX_OFFSET) |
309 | |
310 | ALTERNATIVE "jmp .Lend_\@" , "" , X86_BUG_ESPFIX |
311 | |
312 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
313 | /* |
314 | * Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
315 | * are returning to the kernel. |
316 | * See comments in process.c:copy_thread() for details. |
317 | */ |
318 | movb PT_OLDSS(%esp), %ah |
319 | movb PT_CS(%esp), %al |
320 | andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
321 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
322 | jne .Lend_\@ # returning to user-space with LDT SS |
323 | |
324 | /* |
325 | * Setup and switch to ESPFIX stack |
326 | * |
327 | * We're returning to userspace with a 16 bit stack. The CPU will not |
328 | * restore the high word of ESP for us on executing iret... This is an |
329 | * "official" bug of all the x86-compatible CPUs, which we can work |
330 | * around to make dosemu and wine happy. We do this by preloading the |
331 | * high word of ESP with the high word of the userspace ESP while |
332 | * compensating for the offset by changing to the ESPFIX segment with |
333 | * a base address that matches for the difference. |
334 | */ |
335 | mov %esp, %edx /* load kernel esp */ |
336 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ |
337 | mov %dx, %ax /* eax: new kernel esp */ |
338 | sub %eax, %edx /* offset (low word is 0) */ |
339 | shr $16, %edx |
340 | mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ |
341 | mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ |
342 | pushl $__ESPFIX_SS |
343 | pushl %eax /* new kernel esp */ |
344 | /* |
345 | * Disable interrupts, but do not irqtrace this section: we |
346 | * will soon execute iret and the tracer was already set to |
347 | * the irqstate after the IRET: |
348 | */ |
349 | cli |
350 | lss (%esp), %esp /* switch to espfix segment */ |
351 | .Lend_\@: |
352 | #endif /* CONFIG_X86_ESPFIX32 */ |
353 | .endm |
354 | |
355 | /* |
356 | * Called with pt_regs fully populated and kernel segments loaded, |
357 | * so we can access PER_CPU and use the integer registers. |
358 | * |
359 | * We need to be very careful here with the %esp switch, because an NMI |
360 | * can happen everywhere. If the NMI handler finds itself on the |
361 | * entry-stack, it will overwrite the task-stack and everything we |
362 | * copied there. So allocate the stack-frame on the task-stack and |
363 | * switch to it before we do any copying. |
364 | */ |
365 | |
366 | .macro SWITCH_TO_KERNEL_STACK |
367 | |
368 | BUG_IF_WRONG_CR3 |
369 | |
370 | SWITCH_TO_KERNEL_CR3 scratch_reg=%eax |
371 | |
372 | /* |
373 | * %eax now contains the entry cr3 and we carry it forward in |
374 | * that register for the time this macro runs |
375 | */ |
376 | |
377 | /* Are we on the entry stack? Bail out if not! */ |
378 | movl PER_CPU_VAR(cpu_entry_area), %ecx |
379 | addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx |
380 | subl %esp, %ecx /* ecx = (end of entry_stack) - esp */ |
381 | cmpl $SIZEOF_entry_stack, %ecx |
382 | jae .Lend_\@ |
383 | |
384 | /* Load stack pointer into %esi and %edi */ |
385 | movl %esp, %esi |
386 | movl %esi, %edi |
387 | |
388 | /* Move %edi to the top of the entry stack */ |
389 | andl $(MASK_entry_stack), %edi |
390 | addl $(SIZEOF_entry_stack), %edi |
391 | |
392 | /* Load top of task-stack into %edi */ |
393 | movl TSS_entry2task_stack(%edi), %edi |
394 | |
395 | /* Special case - entry from kernel mode via entry stack */ |
396 | #ifdef CONFIG_VM86 |
397 | movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS |
398 | movb PT_CS(%esp), %cl |
399 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %ecx |
400 | #else |
401 | movl PT_CS(%esp), %ecx |
402 | andl $SEGMENT_RPL_MASK, %ecx |
403 | #endif |
404 | cmpl $USER_RPL, %ecx |
405 | jb .Lentry_from_kernel_\@ |
406 | |
407 | /* Bytes to copy */ |
408 | movl $PTREGS_SIZE, %ecx |
409 | |
410 | #ifdef CONFIG_VM86 |
411 | testl $X86_EFLAGS_VM, PT_EFLAGS(%esi) |
412 | jz .Lcopy_pt_regs_\@ |
413 | |
414 | /* |
415 | * Stack-frame contains 4 additional segment registers when |
416 | * coming from VM86 mode |
417 | */ |
418 | addl $(4 * 4), %ecx |
419 | |
420 | #endif |
421 | .Lcopy_pt_regs_\@: |
422 | |
423 | /* Allocate frame on task-stack */ |
424 | subl %ecx, %edi |
425 | |
426 | /* Switch to task-stack */ |
427 | movl %edi, %esp |
428 | |
429 | /* |
430 | * We are now on the task-stack and can safely copy over the |
431 | * stack-frame |
432 | */ |
433 | shrl $2, %ecx |
434 | cld |
435 | rep movsl |
436 | |
437 | jmp .Lend_\@ |
438 | |
439 | .Lentry_from_kernel_\@: |
440 | |
441 | /* |
442 | * This handles the case when we enter the kernel from |
443 | * kernel-mode and %esp points to the entry-stack. When this |
444 | * happens we need to switch to the task-stack to run C code, |
445 | * but switch back to the entry-stack again when we approach |
446 | * iret and return to the interrupted code-path. This usually |
447 | * happens when we hit an exception while restoring user-space |
448 | * segment registers on the way back to user-space or when the |
449 | * sysenter handler runs with eflags.tf set. |
450 | * |
451 | * When we switch to the task-stack here, we can't trust the |
452 | * contents of the entry-stack anymore, as the exception handler |
453 | * might be scheduled out or moved to another CPU. Therefore we |
454 | * copy the complete entry-stack to the task-stack and set a |
455 | * marker in the iret-frame (bit 31 of the CS dword) to detect |
456 | * what we've done on the iret path. |
457 | * |
458 | * On the iret path we copy everything back and switch to the |
459 | * entry-stack, so that the interrupted kernel code-path |
460 | * continues on the same stack it was interrupted with. |
461 | * |
462 | * Be aware that an NMI can happen anytime in this code. |
463 | * |
464 | * %esi: Entry-Stack pointer (same as %esp) |
465 | * %edi: Top of the task stack |
466 | * %eax: CR3 on kernel entry |
467 | */ |
468 | |
469 | /* Calculate number of bytes on the entry stack in %ecx */ |
470 | movl %esi, %ecx |
471 | |
472 | /* %ecx to the top of entry-stack */ |
473 | andl $(MASK_entry_stack), %ecx |
474 | addl $(SIZEOF_entry_stack), %ecx |
475 | |
476 | /* Number of bytes on the entry stack to %ecx */ |
477 | sub %esi, %ecx |
478 | |
479 | /* Mark stackframe as coming from entry stack */ |
480 | orl $CS_FROM_ENTRY_STACK, PT_CS(%esp) |
481 | |
482 | /* |
483 | * Test the cr3 used to enter the kernel and add a marker |
484 | * so that we can switch back to it before iret. |
485 | */ |
486 | testl $PTI_SWITCH_MASK, %eax |
487 | jz .Lcopy_pt_regs_\@ |
488 | orl $CS_FROM_USER_CR3, PT_CS(%esp) |
489 | |
490 | /* |
491 | * %esi and %edi are unchanged, %ecx contains the number of |
492 | * bytes to copy. The code at .Lcopy_pt_regs_\@ will allocate |
493 | * the stack-frame on task-stack and copy everything over |
494 | */ |
495 | jmp .Lcopy_pt_regs_\@ |
496 | |
497 | .Lend_\@: |
498 | .endm |
499 | |
500 | /* |
501 | * Switch back from the kernel stack to the entry stack. |
502 | * |
503 | * The %esp register must point to pt_regs on the task stack. It will |
504 | * first calculate the size of the stack-frame to copy, depending on |
505 | * whether we return to VM86 mode or not. With that it uses 'rep movsl' |
506 | * to copy the contents of the stack over to the entry stack. |
507 | * |
508 | * We must be very careful here, as we can't trust the contents of the |
509 | * task-stack once we switched to the entry-stack. When an NMI happens |
510 | * while on the entry-stack, the NMI handler will switch back to the top |
511 | * of the task stack, overwriting our stack-frame we are about to copy. |
512 | * Therefore we switch the stack only after everything is copied over. |
513 | */ |
514 | .macro SWITCH_TO_ENTRY_STACK |
515 | |
516 | /* Bytes to copy */ |
517 | movl $PTREGS_SIZE, %ecx |
518 | |
519 | #ifdef CONFIG_VM86 |
520 | testl $(X86_EFLAGS_VM), PT_EFLAGS(%esp) |
521 | jz .Lcopy_pt_regs_\@ |
522 | |
523 | /* Additional 4 registers to copy when returning to VM86 mode */ |
524 | addl $(4 * 4), %ecx |
525 | |
526 | .Lcopy_pt_regs_\@: |
527 | #endif |
528 | |
529 | /* Initialize source and destination for movsl */ |
530 | movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi |
531 | subl %ecx, %edi |
532 | movl %esp, %esi |
533 | |
534 | /* Save future stack pointer in %ebx */ |
535 | movl %edi, %ebx |
536 | |
537 | /* Copy over the stack-frame */ |
538 | shrl $2, %ecx |
539 | cld |
540 | rep movsl |
541 | |
542 | /* |
543 | * Switch to entry-stack - needs to happen after everything is |
544 | * copied because the NMI handler will overwrite the task-stack |
545 | * when on entry-stack |
546 | */ |
547 | movl %ebx, %esp |
548 | |
549 | .Lend_\@: |
550 | .endm |
551 | |
552 | /* |
553 | * This macro handles the case when we return to kernel-mode on the iret |
554 | * path and have to switch back to the entry stack and/or user-cr3 |
555 | * |
556 | * See the comments below the .Lentry_from_kernel_\@ label in the |
557 | * SWITCH_TO_KERNEL_STACK macro for more details. |
558 | */ |
559 | .macro PARANOID_EXIT_TO_KERNEL_MODE |
560 | |
561 | /* |
562 | * Test if we entered the kernel with the entry-stack. Most |
563 | * likely we did not, because this code only runs on the |
564 | * return-to-kernel path. |
565 | */ |
566 | testl $CS_FROM_ENTRY_STACK, PT_CS(%esp) |
567 | jz .Lend_\@ |
568 | |
569 | /* Unlikely slow-path */ |
570 | |
571 | /* Clear marker from stack-frame */ |
572 | andl $(~CS_FROM_ENTRY_STACK), PT_CS(%esp) |
573 | |
574 | /* Copy the remaining task-stack contents to entry-stack */ |
575 | movl %esp, %esi |
576 | movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %edi |
577 | |
578 | /* Bytes on the task-stack to ecx */ |
579 | movl PER_CPU_VAR(cpu_tss_rw + TSS_sp1), %ecx |
580 | subl %esi, %ecx |
581 | |
582 | /* Allocate stack-frame on entry-stack */ |
583 | subl %ecx, %edi |
584 | |
585 | /* |
586 | * Save future stack-pointer, we must not switch until the |
587 | * copy is done, otherwise the NMI handler could destroy the |
588 | * contents of the task-stack we are about to copy. |
589 | */ |
590 | movl %edi, %ebx |
591 | |
592 | /* Do the copy */ |
593 | shrl $2, %ecx |
594 | cld |
595 | rep movsl |
596 | |
597 | /* Safe to switch to entry-stack now */ |
598 | movl %ebx, %esp |
599 | |
600 | /* |
601 | * We came from entry-stack and need to check if we also need to |
602 | * switch back to user cr3. |
603 | */ |
604 | testl $CS_FROM_USER_CR3, PT_CS(%esp) |
605 | jz .Lend_\@ |
606 | |
607 | /* Clear marker from stack-frame */ |
608 | andl $(~CS_FROM_USER_CR3), PT_CS(%esp) |
609 | |
610 | SWITCH_TO_USER_CR3 scratch_reg=%eax |
611 | |
612 | .Lend_\@: |
613 | .endm |
614 | |
615 | /** |
616 | * idtentry - Macro to generate entry stubs for simple IDT entries |
617 | * @vector: Vector number |
618 | * @asmsym: ASM symbol for the entry point |
619 | * @cfunc: C function to be called |
620 | * @has_error_code: Hardware pushed error code on stack |
621 | */ |
622 | .macro idtentry vector asmsym cfunc has_error_code:req |
623 | SYM_CODE_START(\asmsym) |
624 | ASM_CLAC |
625 | cld |
626 | |
627 | .if \has_error_code == 0 |
628 | pushl $0 /* Clear the error code */ |
629 | .endif |
630 | |
631 | /* Push the C-function address into the GS slot */ |
632 | pushl $\cfunc |
633 | /* Invoke the common exception entry */ |
634 | jmp handle_exception |
635 | SYM_CODE_END(\asmsym) |
636 | .endm |
637 | |
638 | .macro idtentry_irq vector cfunc |
639 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
640 | SYM_CODE_START_LOCAL(asm_\cfunc) |
641 | ASM_CLAC |
642 | SAVE_ALL switch_stacks=1 |
643 | ENCODE_FRAME_POINTER |
644 | movl %esp, %eax |
645 | movl PT_ORIG_EAX(%esp), %edx /* get the vector from stack */ |
646 | movl $-1, PT_ORIG_EAX(%esp) /* no syscall to restart */ |
647 | call \cfunc |
648 | jmp handle_exception_return |
649 | SYM_CODE_END(asm_\cfunc) |
650 | .endm |
651 | |
652 | /* |
653 | * Include the defines which emit the idt entries which are shared |
654 | * shared between 32 and 64 bit and emit the __irqentry_text_* markers |
655 | * so the stacktrace boundary checks work. |
656 | */ |
657 | .align 16 |
658 | .globl __irqentry_text_start |
659 | __irqentry_text_start: |
660 | |
661 | #include <asm/idtentry.h> |
662 | |
663 | .align 16 |
664 | .globl __irqentry_text_end |
665 | __irqentry_text_end: |
666 | |
667 | /* |
668 | * %eax: prev task |
669 | * %edx: next task |
670 | */ |
671 | .pushsection .text, "ax" |
672 | SYM_CODE_START(__switch_to_asm) |
673 | /* |
674 | * Save callee-saved registers |
675 | * This must match the order in struct inactive_task_frame |
676 | */ |
677 | pushl %ebp |
678 | pushl %ebx |
679 | pushl %edi |
680 | pushl %esi |
681 | /* |
682 | * Flags are saved to prevent AC leakage. This could go |
683 | * away if objtool would have 32bit support to verify |
684 | * the STAC/CLAC correctness. |
685 | */ |
686 | pushfl |
687 | |
688 | /* switch stack */ |
689 | movl %esp, TASK_threadsp(%eax) |
690 | movl TASK_threadsp(%edx), %esp |
691 | |
692 | #ifdef CONFIG_STACKPROTECTOR |
693 | movl TASK_stack_canary(%edx), %ebx |
694 | movl %ebx, PER_CPU_VAR(__stack_chk_guard) |
695 | #endif |
696 | |
697 | /* |
698 | * When switching from a shallower to a deeper call stack |
699 | * the RSB may either underflow or use entries populated |
700 | * with userspace addresses. On CPUs where those concerns |
701 | * exist, overwrite the RSB with entries which capture |
702 | * speculative execution to prevent attack. |
703 | */ |
704 | FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
705 | |
706 | /* Restore flags or the incoming task to restore AC state. */ |
707 | popfl |
708 | /* restore callee-saved registers */ |
709 | popl %esi |
710 | popl %edi |
711 | popl %ebx |
712 | popl %ebp |
713 | |
714 | jmp __switch_to |
715 | SYM_CODE_END(__switch_to_asm) |
716 | .popsection |
717 | |
718 | /* |
719 | * A newly forked process directly context switches into this address. |
720 | * |
721 | * eax: prev task we switched from |
722 | * ebx: kernel thread func (NULL for user thread) |
723 | * edi: kernel thread arg |
724 | */ |
725 | .pushsection .text, "ax" |
726 | SYM_CODE_START(ret_from_fork_asm) |
727 | movl %esp, %edx /* regs */ |
728 | |
729 | /* return address for the stack unwinder */ |
730 | pushl $.Lsyscall_32_done |
731 | |
732 | FRAME_BEGIN |
733 | /* prev already in EAX */ |
734 | movl %ebx, %ecx /* fn */ |
735 | pushl %edi /* fn_arg */ |
736 | call ret_from_fork |
737 | addl $4, %esp |
738 | FRAME_END |
739 | |
740 | RET |
741 | SYM_CODE_END(ret_from_fork_asm) |
742 | .popsection |
743 | |
744 | SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) |
745 | /* |
746 | * All code from here through __end_SYSENTER_singlestep_region is subject |
747 | * to being single-stepped if a user program sets TF and executes SYSENTER. |
748 | * There is absolutely nothing that we can do to prevent this from happening |
749 | * (thanks Intel!). To keep our handling of this situation as simple as |
750 | * possible, we handle TF just like AC and NT, except that our #DB handler |
751 | * will ignore all of the single-step traps generated in this range. |
752 | */ |
753 | |
754 | /* |
755 | * 32-bit SYSENTER entry. |
756 | * |
757 | * 32-bit system calls through the vDSO's __kernel_vsyscall enter here |
758 | * if X86_FEATURE_SEP is available. This is the preferred system call |
759 | * entry on 32-bit systems. |
760 | * |
761 | * The SYSENTER instruction, in principle, should *only* occur in the |
762 | * vDSO. In practice, a small number of Android devices were shipped |
763 | * with a copy of Bionic that inlined a SYSENTER instruction. This |
764 | * never happened in any of Google's Bionic versions -- it only happened |
765 | * in a narrow range of Intel-provided versions. |
766 | * |
767 | * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. |
768 | * IF and VM in RFLAGS are cleared (IOW: interrupts are off). |
769 | * SYSENTER does not save anything on the stack, |
770 | * and does not save old EIP (!!!), ESP, or EFLAGS. |
771 | * |
772 | * To avoid losing track of EFLAGS.VM (and thus potentially corrupting |
773 | * user and/or vm86 state), we explicitly disable the SYSENTER |
774 | * instruction in vm86 mode by reprogramming the MSRs. |
775 | * |
776 | * Arguments: |
777 | * eax system call number |
778 | * ebx arg1 |
779 | * ecx arg2 |
780 | * edx arg3 |
781 | * esi arg4 |
782 | * edi arg5 |
783 | * ebp user stack |
784 | * 0(%ebp) arg6 |
785 | */ |
786 | SYM_FUNC_START(entry_SYSENTER_32) |
787 | /* |
788 | * On entry-stack with all userspace-regs live - save and |
789 | * restore eflags and %eax to use it as scratch-reg for the cr3 |
790 | * switch. |
791 | */ |
792 | pushfl |
793 | pushl %eax |
794 | BUG_IF_WRONG_CR3 no_user_check=1 |
795 | SWITCH_TO_KERNEL_CR3 scratch_reg=%eax |
796 | popl %eax |
797 | popfl |
798 | |
799 | /* Stack empty again, switch to task stack */ |
800 | movl TSS_entry2task_stack(%esp), %esp |
801 | |
802 | .Lsysenter_past_esp: |
803 | pushl $__USER_DS /* pt_regs->ss */ |
804 | pushl $0 /* pt_regs->sp (placeholder) */ |
805 | pushfl /* pt_regs->flags (except IF = 0) */ |
806 | pushl $__USER_CS /* pt_regs->cs */ |
807 | pushl $0 /* pt_regs->ip = 0 (placeholder) */ |
808 | pushl %eax /* pt_regs->orig_ax */ |
809 | SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, stack already switched */ |
810 | |
811 | /* |
812 | * SYSENTER doesn't filter flags, so we need to clear NT, AC |
813 | * and TF ourselves. To save a few cycles, we can check whether |
814 | * either was set instead of doing an unconditional popfq. |
815 | * This needs to happen before enabling interrupts so that |
816 | * we don't get preempted with NT set. |
817 | * |
818 | * If TF is set, we will single-step all the way to here -- do_debug |
819 | * will ignore all the traps. (Yes, this is slow, but so is |
820 | * single-stepping in general. This allows us to avoid having |
821 | * a more complicated code to handle the case where a user program |
822 | * forces us to single-step through the SYSENTER entry code.) |
823 | * |
824 | * NB.: .Lsysenter_fix_flags is a label with the code under it moved |
825 | * out-of-line as an optimization: NT is unlikely to be set in the |
826 | * majority of the cases and instead of polluting the I$ unnecessarily, |
827 | * we're keeping that code behind a branch which will predict as |
828 | * not-taken and therefore its instructions won't be fetched. |
829 | */ |
830 | testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) |
831 | jnz .Lsysenter_fix_flags |
832 | .Lsysenter_flags_fixed: |
833 | |
834 | movl %esp, %eax |
835 | call do_SYSENTER_32 |
836 | testb %al, %al |
837 | jz .Lsyscall_32_done |
838 | |
839 | STACKLEAK_ERASE |
840 | |
841 | /* Opportunistic SYSEXIT */ |
842 | |
843 | /* |
844 | * Setup entry stack - we keep the pointer in %eax and do the |
845 | * switch after almost all user-state is restored. |
846 | */ |
847 | |
848 | /* Load entry stack pointer and allocate frame for eflags/eax */ |
849 | movl PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %eax |
850 | subl $(2*4), %eax |
851 | |
852 | /* Copy eflags and eax to entry stack */ |
853 | movl PT_EFLAGS(%esp), %edi |
854 | movl PT_EAX(%esp), %esi |
855 | movl %edi, (%eax) |
856 | movl %esi, 4(%eax) |
857 | |
858 | /* Restore user registers and segments */ |
859 | movl PT_EIP(%esp), %edx /* pt_regs->ip */ |
860 | movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ |
861 | 1: mov PT_FS(%esp), %fs |
862 | |
863 | popl %ebx /* pt_regs->bx */ |
864 | addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ |
865 | popl %esi /* pt_regs->si */ |
866 | popl %edi /* pt_regs->di */ |
867 | popl %ebp /* pt_regs->bp */ |
868 | |
869 | /* Switch to entry stack */ |
870 | movl %eax, %esp |
871 | |
872 | /* Now ready to switch the cr3 */ |
873 | SWITCH_TO_USER_CR3 scratch_reg=%eax |
874 | |
875 | /* |
876 | * Restore all flags except IF. (We restore IF separately because |
877 | * STI gives a one-instruction window in which we won't be interrupted, |
878 | * whereas POPF does not.) |
879 | */ |
880 | btrl $X86_EFLAGS_IF_BIT, (%esp) |
881 | BUG_IF_WRONG_CR3 no_user_check=1 |
882 | popfl |
883 | popl %eax |
884 | CLEAR_CPU_BUFFERS |
885 | |
886 | /* |
887 | * Return back to the vDSO, which will pop ecx and edx. |
888 | * Don't bother with DS and ES (they already contain __USER_DS). |
889 | */ |
890 | sti |
891 | sysexit |
892 | |
893 | 2: movl $0, PT_FS(%esp) |
894 | jmp 1b |
895 | _ASM_EXTABLE(1b, 2b) |
896 | |
897 | .Lsysenter_fix_flags: |
898 | pushl $X86_EFLAGS_FIXED |
899 | popfl |
900 | jmp .Lsysenter_flags_fixed |
901 | SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) |
902 | SYM_FUNC_END(entry_SYSENTER_32) |
903 | |
904 | /* |
905 | * 32-bit legacy system call entry. |
906 | * |
907 | * 32-bit x86 Linux system calls traditionally used the INT $0x80 |
908 | * instruction. INT $0x80 lands here. |
909 | * |
910 | * This entry point can be used by any 32-bit perform system calls. |
911 | * Instances of INT $0x80 can be found inline in various programs and |
912 | * libraries. It is also used by the vDSO's __kernel_vsyscall |
913 | * fallback for hardware that doesn't support a faster entry method. |
914 | * Restarted 32-bit system calls also fall back to INT $0x80 |
915 | * regardless of what instruction was originally used to do the system |
916 | * call. (64-bit programs can use INT $0x80 as well, but they can |
917 | * only run on 64-bit kernels and therefore land in |
918 | * entry_INT80_compat.) |
919 | * |
920 | * This is considered a slow path. It is not used by most libc |
921 | * implementations on modern hardware except during process startup. |
922 | * |
923 | * Arguments: |
924 | * eax system call number |
925 | * ebx arg1 |
926 | * ecx arg2 |
927 | * edx arg3 |
928 | * esi arg4 |
929 | * edi arg5 |
930 | * ebp arg6 |
931 | */ |
932 | SYM_FUNC_START(entry_INT80_32) |
933 | ASM_CLAC |
934 | pushl %eax /* pt_regs->orig_ax */ |
935 | |
936 | SAVE_ALL pt_regs_ax=$-ENOSYS switch_stacks=1 /* save rest */ |
937 | |
938 | movl %esp, %eax |
939 | call do_int80_syscall_32 |
940 | .Lsyscall_32_done: |
941 | STACKLEAK_ERASE |
942 | |
943 | restore_all_switch_stack: |
944 | SWITCH_TO_ENTRY_STACK |
945 | CHECK_AND_APPLY_ESPFIX |
946 | |
947 | /* Switch back to user CR3 */ |
948 | SWITCH_TO_USER_CR3 scratch_reg=%eax |
949 | |
950 | BUG_IF_WRONG_CR3 |
951 | |
952 | /* Restore user state */ |
953 | RESTORE_REGS pop=4 # skip orig_eax/error_code |
954 | CLEAR_CPU_BUFFERS |
955 | .Lirq_return: |
956 | /* |
957 | * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization |
958 | * when returning from IPI handler and when returning from |
959 | * scheduler to user-space. |
960 | */ |
961 | iret |
962 | |
963 | .Lasm_iret_error: |
964 | pushl $0 # no error code |
965 | pushl $iret_error |
966 | |
967 | #ifdef CONFIG_DEBUG_ENTRY |
968 | /* |
969 | * The stack-frame here is the one that iret faulted on, so its a |
970 | * return-to-user frame. We are on kernel-cr3 because we come here from |
971 | * the fixup code. This confuses the CR3 checker, so switch to user-cr3 |
972 | * as the checker expects it. |
973 | */ |
974 | pushl %eax |
975 | SWITCH_TO_USER_CR3 scratch_reg=%eax |
976 | popl %eax |
977 | #endif |
978 | |
979 | jmp handle_exception |
980 | |
981 | _ASM_EXTABLE(.Lirq_return, .Lasm_iret_error) |
982 | SYM_FUNC_END(entry_INT80_32) |
983 | |
984 | .macro FIXUP_ESPFIX_STACK |
985 | /* |
986 | * Switch back for ESPFIX stack to the normal zerobased stack |
987 | * |
988 | * We can't call C functions using the ESPFIX stack. This code reads |
989 | * the high word of the segment base from the GDT and swiches to the |
990 | * normal stack and adjusts ESP with the matching offset. |
991 | * |
992 | * We might be on user CR3 here, so percpu data is not mapped and we can't |
993 | * access the GDT through the percpu segment. Instead, use SGDT to find |
994 | * the cpu_entry_area alias of the GDT. |
995 | */ |
996 | #ifdef CONFIG_X86_ESPFIX32 |
997 | /* fixup the stack */ |
998 | pushl %ecx |
999 | subl $2*4, %esp |
1000 | sgdt (%esp) |
1001 | movl 2(%esp), %ecx /* GDT address */ |
1002 | /* |
1003 | * Careful: ECX is a linear pointer, so we need to force base |
1004 | * zero. %cs is the only known-linear segment we have right now. |
1005 | */ |
1006 | mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ |
1007 | mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ |
1008 | shl $16, %eax |
1009 | addl $2*4, %esp |
1010 | popl %ecx |
1011 | addl %esp, %eax /* the adjusted stack pointer */ |
1012 | pushl $__KERNEL_DS |
1013 | pushl %eax |
1014 | lss (%esp), %esp /* switch to the normal stack segment */ |
1015 | #endif |
1016 | .endm |
1017 | |
1018 | .macro UNWIND_ESPFIX_STACK |
1019 | /* It's safe to clobber %eax, all other regs need to be preserved */ |
1020 | #ifdef CONFIG_X86_ESPFIX32 |
1021 | movl %ss, %eax |
1022 | /* see if on espfix stack */ |
1023 | cmpw $__ESPFIX_SS, %ax |
1024 | jne .Lno_fixup_\@ |
1025 | /* switch to normal stack */ |
1026 | FIXUP_ESPFIX_STACK |
1027 | .Lno_fixup_\@: |
1028 | #endif |
1029 | .endm |
1030 | |
1031 | SYM_CODE_START_LOCAL_NOALIGN(handle_exception) |
1032 | /* the function address is in %gs's slot on the stack */ |
1033 | SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 |
1034 | ENCODE_FRAME_POINTER |
1035 | |
1036 | movl PT_GS(%esp), %edi # get the function address |
1037 | |
1038 | /* fixup orig %eax */ |
1039 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
1040 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
1041 | |
1042 | movl %esp, %eax # pt_regs pointer |
1043 | CALL_NOSPEC edi |
1044 | |
1045 | handle_exception_return: |
1046 | #ifdef CONFIG_VM86 |
1047 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS |
1048 | movb PT_CS(%esp), %al |
1049 | andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax |
1050 | #else |
1051 | /* |
1052 | * We can be coming here from child spawned by kernel_thread(). |
1053 | */ |
1054 | movl PT_CS(%esp), %eax |
1055 | andl $SEGMENT_RPL_MASK, %eax |
1056 | #endif |
1057 | cmpl $USER_RPL, %eax # returning to v8086 or userspace ? |
1058 | jnb ret_to_user |
1059 | |
1060 | PARANOID_EXIT_TO_KERNEL_MODE |
1061 | BUG_IF_WRONG_CR3 |
1062 | RESTORE_REGS 4 |
1063 | jmp .Lirq_return |
1064 | |
1065 | ret_to_user: |
1066 | movl %esp, %eax |
1067 | jmp restore_all_switch_stack |
1068 | SYM_CODE_END(handle_exception) |
1069 | |
1070 | SYM_CODE_START(asm_exc_double_fault) |
1071 | 1: |
1072 | /* |
1073 | * This is a task gate handler, not an interrupt gate handler. |
1074 | * The error code is on the stack, but the stack is otherwise |
1075 | * empty. Interrupts are off. Our state is sane with the following |
1076 | * exceptions: |
1077 | * |
1078 | * - CR0.TS is set. "TS" literally means "task switched". |
1079 | * - EFLAGS.NT is set because we're a "nested task". |
1080 | * - The doublefault TSS has back_link set and has been marked busy. |
1081 | * - TR points to the doublefault TSS and the normal TSS is busy. |
1082 | * - CR3 is the normal kernel PGD. This would be delightful, except |
1083 | * that the CPU didn't bother to save the old CR3 anywhere. This |
1084 | * would make it very awkward to return back to the context we came |
1085 | * from. |
1086 | * |
1087 | * The rest of EFLAGS is sanitized for us, so we don't need to |
1088 | * worry about AC or DF. |
1089 | * |
1090 | * Don't even bother popping the error code. It's always zero, |
1091 | * and ignoring it makes us a bit more robust against buggy |
1092 | * hypervisor task gate implementations. |
1093 | * |
1094 | * We will manually undo the task switch instead of doing a |
1095 | * task-switching IRET. |
1096 | */ |
1097 | |
1098 | clts /* clear CR0.TS */ |
1099 | pushl $X86_EFLAGS_FIXED |
1100 | popfl /* clear EFLAGS.NT */ |
1101 | |
1102 | call doublefault_shim |
1103 | |
1104 | /* We don't support returning, so we have no IRET here. */ |
1105 | 1: |
1106 | hlt |
1107 | jmp 1b |
1108 | SYM_CODE_END(asm_exc_double_fault) |
1109 | |
1110 | /* |
1111 | * NMI is doubly nasty. It can happen on the first instruction of |
1112 | * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning |
1113 | * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 |
1114 | * switched stacks. We handle both conditions by simply checking whether we |
1115 | * interrupted kernel code running on the SYSENTER stack. |
1116 | */ |
1117 | SYM_CODE_START(asm_exc_nmi) |
1118 | ASM_CLAC |
1119 | |
1120 | #ifdef CONFIG_X86_ESPFIX32 |
1121 | /* |
1122 | * ESPFIX_SS is only ever set on the return to user path |
1123 | * after we've switched to the entry stack. |
1124 | */ |
1125 | pushl %eax |
1126 | movl %ss, %eax |
1127 | cmpw $__ESPFIX_SS, %ax |
1128 | popl %eax |
1129 | je .Lnmi_espfix_stack |
1130 | #endif |
1131 | |
1132 | pushl %eax # pt_regs->orig_ax |
1133 | SAVE_ALL_NMI cr3_reg=%edi |
1134 | ENCODE_FRAME_POINTER |
1135 | xorl %edx, %edx # zero error code |
1136 | movl %esp, %eax # pt_regs pointer |
1137 | |
1138 | /* Are we currently on the SYSENTER stack? */ |
1139 | movl PER_CPU_VAR(cpu_entry_area), %ecx |
1140 | addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx |
1141 | subl %eax, %ecx /* ecx = (end of entry_stack) - esp */ |
1142 | cmpl $SIZEOF_entry_stack, %ecx |
1143 | jb .Lnmi_from_sysenter_stack |
1144 | |
1145 | /* Not on SYSENTER stack. */ |
1146 | call exc_nmi |
1147 | CLEAR_CPU_BUFFERS |
1148 | jmp .Lnmi_return |
1149 | |
1150 | .Lnmi_from_sysenter_stack: |
1151 | /* |
1152 | * We're on the SYSENTER stack. Switch off. No one (not even debug) |
1153 | * is using the thread stack right now, so it's safe for us to use it. |
1154 | */ |
1155 | movl %esp, %ebx |
1156 | movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp |
1157 | call exc_nmi |
1158 | movl %ebx, %esp |
1159 | |
1160 | .Lnmi_return: |
1161 | #ifdef CONFIG_X86_ESPFIX32 |
1162 | testl $CS_FROM_ESPFIX, PT_CS(%esp) |
1163 | jnz .Lnmi_from_espfix |
1164 | #endif |
1165 | |
1166 | CHECK_AND_APPLY_ESPFIX |
1167 | RESTORE_ALL_NMI cr3_reg=%edi pop=4 |
1168 | jmp .Lirq_return |
1169 | |
1170 | #ifdef CONFIG_X86_ESPFIX32 |
1171 | .Lnmi_espfix_stack: |
1172 | /* |
1173 | * Create the pointer to LSS back |
1174 | */ |
1175 | pushl %ss |
1176 | pushl %esp |
1177 | addl $4, (%esp) |
1178 | |
1179 | /* Copy the (short) IRET frame */ |
1180 | pushl 4*4(%esp) # flags |
1181 | pushl 4*4(%esp) # cs |
1182 | pushl 4*4(%esp) # ip |
1183 | |
1184 | pushl %eax # orig_ax |
1185 | |
1186 | SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 |
1187 | ENCODE_FRAME_POINTER |
1188 | |
1189 | /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ |
1190 | xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) |
1191 | |
1192 | xorl %edx, %edx # zero error code |
1193 | movl %esp, %eax # pt_regs pointer |
1194 | jmp .Lnmi_from_sysenter_stack |
1195 | |
1196 | .Lnmi_from_espfix: |
1197 | RESTORE_ALL_NMI cr3_reg=%edi |
1198 | /* |
1199 | * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to |
1200 | * fix up the gap and long frame: |
1201 | * |
1202 | * 3 - original frame (exception) |
1203 | * 2 - ESPFIX block (above) |
1204 | * 6 - gap (FIXUP_FRAME) |
1205 | * 5 - long frame (FIXUP_FRAME) |
1206 | * 1 - orig_ax |
1207 | */ |
1208 | lss (1+5+6)*4(%esp), %esp # back to espfix stack |
1209 | jmp .Lirq_return |
1210 | #endif |
1211 | SYM_CODE_END(asm_exc_nmi) |
1212 | |
1213 | .pushsection .text, "ax" |
1214 | SYM_CODE_START(rewind_stack_and_make_dead) |
1215 | /* Prevent any naive code from trying to unwind to our caller. */ |
1216 | xorl %ebp, %ebp |
1217 | |
1218 | movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi |
1219 | leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp |
1220 | |
1221 | call make_task_dead |
1222 | 1: jmp 1b |
1223 | SYM_CODE_END(rewind_stack_and_make_dead) |
1224 | .popsection |
1225 | |