| 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | #include <linux/export.h> |
| 3 | #include <linux/linkage.h> |
| 4 | #include <linux/cfi_types.h> |
| 5 | #include <linux/objtool.h> |
| 6 | #include <asm/asm.h> |
| 7 | |
| 8 | /* |
| 9 | * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is |
| 10 | * recommended to use this when possible and we do use them by default. |
| 11 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. |
| 12 | * Otherwise, use original. |
| 13 | */ |
| 14 | |
| 15 | /* |
| 16 | * Zero a page. |
| 17 | * %rdi - page |
| 18 | */ |
| 19 | SYM_TYPED_FUNC_START(clear_page_rep) |
| 20 | movl $4096/8,%ecx |
| 21 | xorl %eax,%eax |
| 22 | rep stosq |
| 23 | RET |
| 24 | SYM_FUNC_END(clear_page_rep) |
| 25 | EXPORT_SYMBOL_GPL(clear_page_rep) |
| 26 | |
| 27 | SYM_TYPED_FUNC_START(clear_page_orig) |
| 28 | xorl %eax,%eax |
| 29 | movl $4096/64,%ecx |
| 30 | .p2align 4 |
| 31 | .Lloop: |
| 32 | decl %ecx |
| 33 | #define PUT(x) movq %rax,x*8(%rdi) |
| 34 | movq %rax,(%rdi) |
| 35 | PUT(1) |
| 36 | PUT(2) |
| 37 | PUT(3) |
| 38 | PUT(4) |
| 39 | PUT(5) |
| 40 | PUT(6) |
| 41 | PUT(7) |
| 42 | leaq 64(%rdi),%rdi |
| 43 | jnz .Lloop |
| 44 | nop |
| 45 | RET |
| 46 | SYM_FUNC_END(clear_page_orig) |
| 47 | EXPORT_SYMBOL_GPL(clear_page_orig) |
| 48 | |
| 49 | SYM_TYPED_FUNC_START(clear_page_erms) |
| 50 | movl $4096,%ecx |
| 51 | xorl %eax,%eax |
| 52 | rep stosb |
| 53 | RET |
| 54 | SYM_FUNC_END(clear_page_erms) |
| 55 | EXPORT_SYMBOL_GPL(clear_page_erms) |
| 56 | |
| 57 | /* |
| 58 | * Default clear user-space. |
| 59 | * Input: |
| 60 | * rdi destination |
| 61 | * rcx count |
| 62 | * rax is zero |
| 63 | * |
| 64 | * Output: |
| 65 | * rcx: uncleared bytes or 0 if successful. |
| 66 | */ |
| 67 | SYM_FUNC_START(rep_stos_alternative) |
| 68 | ANNOTATE_NOENDBR |
| 69 | cmpq $64,%rcx |
| 70 | jae .Lunrolled |
| 71 | |
| 72 | cmp $8,%ecx |
| 73 | jae .Lword |
| 74 | |
| 75 | testl %ecx,%ecx |
| 76 | je .Lexit |
| 77 | |
| 78 | .Lclear_user_tail: |
| 79 | 0: movb %al,(%rdi) |
| 80 | inc %rdi |
| 81 | dec %rcx |
| 82 | jnz .Lclear_user_tail |
| 83 | .Lexit: |
| 84 | RET |
| 85 | |
| 86 | _ASM_EXTABLE_UA( 0b, .Lexit) |
| 87 | |
| 88 | .Lword: |
| 89 | 1: movq %rax,(%rdi) |
| 90 | addq $8,%rdi |
| 91 | sub $8,%ecx |
| 92 | je .Lexit |
| 93 | cmp $8,%ecx |
| 94 | jae .Lword |
| 95 | jmp .Lclear_user_tail |
| 96 | |
| 97 | .p2align 4 |
| 98 | .Lunrolled: |
| 99 | 10: movq %rax,(%rdi) |
| 100 | 11: movq %rax,8(%rdi) |
| 101 | 12: movq %rax,16(%rdi) |
| 102 | 13: movq %rax,24(%rdi) |
| 103 | 14: movq %rax,32(%rdi) |
| 104 | 15: movq %rax,40(%rdi) |
| 105 | 16: movq %rax,48(%rdi) |
| 106 | 17: movq %rax,56(%rdi) |
| 107 | addq $64,%rdi |
| 108 | subq $64,%rcx |
| 109 | cmpq $64,%rcx |
| 110 | jae .Lunrolled |
| 111 | cmpl $8,%ecx |
| 112 | jae .Lword |
| 113 | testl %ecx,%ecx |
| 114 | jne .Lclear_user_tail |
| 115 | RET |
| 116 | |
| 117 | /* |
| 118 | * If we take an exception on any of the |
| 119 | * word stores, we know that %rcx isn't zero, |
| 120 | * so we can just go to the tail clearing to |
| 121 | * get the exact count. |
| 122 | * |
| 123 | * The unrolled case might end up clearing |
| 124 | * some bytes twice. Don't care. |
| 125 | * |
| 126 | * We could use the value in %rdi to avoid |
| 127 | * a second fault on the exact count case, |
| 128 | * but do we really care? No. |
| 129 | * |
| 130 | * Finally, we could try to align %rdi at the |
| 131 | * top of the unrolling. But unaligned stores |
| 132 | * just aren't that common or expensive. |
| 133 | */ |
| 134 | _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) |
| 135 | _ASM_EXTABLE_UA(10b, .Lclear_user_tail) |
| 136 | _ASM_EXTABLE_UA(11b, .Lclear_user_tail) |
| 137 | _ASM_EXTABLE_UA(12b, .Lclear_user_tail) |
| 138 | _ASM_EXTABLE_UA(13b, .Lclear_user_tail) |
| 139 | _ASM_EXTABLE_UA(14b, .Lclear_user_tail) |
| 140 | _ASM_EXTABLE_UA(15b, .Lclear_user_tail) |
| 141 | _ASM_EXTABLE_UA(16b, .Lclear_user_tail) |
| 142 | _ASM_EXTABLE_UA(17b, .Lclear_user_tail) |
| 143 | SYM_FUNC_END(rep_stos_alternative) |
| 144 | EXPORT_SYMBOL(rep_stos_alternative) |
| 145 | |