1/* SPDX-License-Identifier: GPL-2.0-only */
2#include <linux/export.h>
3#include <linux/linkage.h>
4#include <asm/asm.h>
5
6/*
7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
8 * recommended to use this when possible and we do use them by default.
9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
10 * Otherwise, use original.
11 */
12
13/*
14 * Zero a page.
15 * %rdi - page
16 */
17SYM_FUNC_START(clear_page_rep)
18 movl $4096/8,%ecx
19 xorl %eax,%eax
20 rep stosq
21 RET
22SYM_FUNC_END(clear_page_rep)
23EXPORT_SYMBOL_GPL(clear_page_rep)
24
25SYM_FUNC_START(clear_page_orig)
26 xorl %eax,%eax
27 movl $4096/64,%ecx
28 .p2align 4
29.Lloop:
30 decl %ecx
31#define PUT(x) movq %rax,x*8(%rdi)
32 movq %rax,(%rdi)
33 PUT(1)
34 PUT(2)
35 PUT(3)
36 PUT(4)
37 PUT(5)
38 PUT(6)
39 PUT(7)
40 leaq 64(%rdi),%rdi
41 jnz .Lloop
42 nop
43 RET
44SYM_FUNC_END(clear_page_orig)
45EXPORT_SYMBOL_GPL(clear_page_orig)
46
47SYM_FUNC_START(clear_page_erms)
48 movl $4096,%ecx
49 xorl %eax,%eax
50 rep stosb
51 RET
52SYM_FUNC_END(clear_page_erms)
53EXPORT_SYMBOL_GPL(clear_page_erms)
54
55/*
56 * Default clear user-space.
57 * Input:
58 * rdi destination
59 * rcx count
60 * rax is zero
61 *
62 * Output:
63 * rcx: uncleared bytes or 0 if successful.
64 */
65SYM_FUNC_START(rep_stos_alternative)
66 cmpq $64,%rcx
67 jae .Lunrolled
68
69 cmp $8,%ecx
70 jae .Lword
71
72 testl %ecx,%ecx
73 je .Lexit
74
75.Lclear_user_tail:
760: movb %al,(%rdi)
77 inc %rdi
78 dec %rcx
79 jnz .Lclear_user_tail
80.Lexit:
81 RET
82
83 _ASM_EXTABLE_UA( 0b, .Lexit)
84
85.Lword:
861: movq %rax,(%rdi)
87 addq $8,%rdi
88 sub $8,%ecx
89 je .Lexit
90 cmp $8,%ecx
91 jae .Lword
92 jmp .Lclear_user_tail
93
94 .p2align 4
95.Lunrolled:
9610: movq %rax,(%rdi)
9711: movq %rax,8(%rdi)
9812: movq %rax,16(%rdi)
9913: movq %rax,24(%rdi)
10014: movq %rax,32(%rdi)
10115: movq %rax,40(%rdi)
10216: movq %rax,48(%rdi)
10317: movq %rax,56(%rdi)
104 addq $64,%rdi
105 subq $64,%rcx
106 cmpq $64,%rcx
107 jae .Lunrolled
108 cmpl $8,%ecx
109 jae .Lword
110 testl %ecx,%ecx
111 jne .Lclear_user_tail
112 RET
113
114 /*
115 * If we take an exception on any of the
116 * word stores, we know that %rcx isn't zero,
117 * so we can just go to the tail clearing to
118 * get the exact count.
119 *
120 * The unrolled case might end up clearing
121 * some bytes twice. Don't care.
122 *
123 * We could use the value in %rdi to avoid
124 * a second fault on the exact count case,
125 * but do we really care? No.
126 *
127 * Finally, we could try to align %rdi at the
128 * top of the unrolling. But unaligned stores
129 * just aren't that common or expensive.
130 */
131 _ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
132 _ASM_EXTABLE_UA(10b, .Lclear_user_tail)
133 _ASM_EXTABLE_UA(11b, .Lclear_user_tail)
134 _ASM_EXTABLE_UA(12b, .Lclear_user_tail)
135 _ASM_EXTABLE_UA(13b, .Lclear_user_tail)
136 _ASM_EXTABLE_UA(14b, .Lclear_user_tail)
137 _ASM_EXTABLE_UA(15b, .Lclear_user_tail)
138 _ASM_EXTABLE_UA(16b, .Lclear_user_tail)
139 _ASM_EXTABLE_UA(17b, .Lclear_user_tail)
140SYM_FUNC_END(rep_stos_alternative)
141EXPORT_SYMBOL(rep_stos_alternative)
142

source code of linux/arch/x86/lib/clear_page_64.S