1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | #include <linux/export.h> |
3 | #include <linux/linkage.h> |
4 | #include <asm/asm.h> |
5 | |
6 | /* |
7 | * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is |
8 | * recommended to use this when possible and we do use them by default. |
9 | * If enhanced REP MOVSB/STOSB is not available, try to use fast string. |
10 | * Otherwise, use original. |
11 | */ |
12 | |
13 | /* |
14 | * Zero a page. |
15 | * %rdi - page |
16 | */ |
17 | SYM_FUNC_START(clear_page_rep) |
18 | movl $4096/8,%ecx |
19 | xorl %eax,%eax |
20 | rep stosq |
21 | RET |
22 | SYM_FUNC_END(clear_page_rep) |
23 | EXPORT_SYMBOL_GPL(clear_page_rep) |
24 | |
25 | SYM_FUNC_START(clear_page_orig) |
26 | xorl %eax,%eax |
27 | movl $4096/64,%ecx |
28 | .p2align 4 |
29 | .Lloop: |
30 | decl %ecx |
31 | #define PUT(x) movq %rax,x*8(%rdi) |
32 | movq %rax,(%rdi) |
33 | PUT(1) |
34 | PUT(2) |
35 | PUT(3) |
36 | PUT(4) |
37 | PUT(5) |
38 | PUT(6) |
39 | PUT(7) |
40 | leaq 64(%rdi),%rdi |
41 | jnz .Lloop |
42 | nop |
43 | RET |
44 | SYM_FUNC_END(clear_page_orig) |
45 | EXPORT_SYMBOL_GPL(clear_page_orig) |
46 | |
47 | SYM_FUNC_START(clear_page_erms) |
48 | movl $4096,%ecx |
49 | xorl %eax,%eax |
50 | rep stosb |
51 | RET |
52 | SYM_FUNC_END(clear_page_erms) |
53 | EXPORT_SYMBOL_GPL(clear_page_erms) |
54 | |
55 | /* |
56 | * Default clear user-space. |
57 | * Input: |
58 | * rdi destination |
59 | * rcx count |
60 | * rax is zero |
61 | * |
62 | * Output: |
63 | * rcx: uncleared bytes or 0 if successful. |
64 | */ |
65 | SYM_FUNC_START(rep_stos_alternative) |
66 | cmpq $64,%rcx |
67 | jae .Lunrolled |
68 | |
69 | cmp $8,%ecx |
70 | jae .Lword |
71 | |
72 | testl %ecx,%ecx |
73 | je .Lexit |
74 | |
75 | .Lclear_user_tail: |
76 | 0: movb %al,(%rdi) |
77 | inc %rdi |
78 | dec %rcx |
79 | jnz .Lclear_user_tail |
80 | .Lexit: |
81 | RET |
82 | |
83 | _ASM_EXTABLE_UA( 0b, .Lexit) |
84 | |
85 | .Lword: |
86 | 1: movq %rax,(%rdi) |
87 | addq $8,%rdi |
88 | sub $8,%ecx |
89 | je .Lexit |
90 | cmp $8,%ecx |
91 | jae .Lword |
92 | jmp .Lclear_user_tail |
93 | |
94 | .p2align 4 |
95 | .Lunrolled: |
96 | 10: movq %rax,(%rdi) |
97 | 11: movq %rax,8(%rdi) |
98 | 12: movq %rax,16(%rdi) |
99 | 13: movq %rax,24(%rdi) |
100 | 14: movq %rax,32(%rdi) |
101 | 15: movq %rax,40(%rdi) |
102 | 16: movq %rax,48(%rdi) |
103 | 17: movq %rax,56(%rdi) |
104 | addq $64,%rdi |
105 | subq $64,%rcx |
106 | cmpq $64,%rcx |
107 | jae .Lunrolled |
108 | cmpl $8,%ecx |
109 | jae .Lword |
110 | testl %ecx,%ecx |
111 | jne .Lclear_user_tail |
112 | RET |
113 | |
114 | /* |
115 | * If we take an exception on any of the |
116 | * word stores, we know that %rcx isn't zero, |
117 | * so we can just go to the tail clearing to |
118 | * get the exact count. |
119 | * |
120 | * The unrolled case might end up clearing |
121 | * some bytes twice. Don't care. |
122 | * |
123 | * We could use the value in %rdi to avoid |
124 | * a second fault on the exact count case, |
125 | * but do we really care? No. |
126 | * |
127 | * Finally, we could try to align %rdi at the |
128 | * top of the unrolling. But unaligned stores |
129 | * just aren't that common or expensive. |
130 | */ |
131 | _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) |
132 | _ASM_EXTABLE_UA(10b, .Lclear_user_tail) |
133 | _ASM_EXTABLE_UA(11b, .Lclear_user_tail) |
134 | _ASM_EXTABLE_UA(12b, .Lclear_user_tail) |
135 | _ASM_EXTABLE_UA(13b, .Lclear_user_tail) |
136 | _ASM_EXTABLE_UA(14b, .Lclear_user_tail) |
137 | _ASM_EXTABLE_UA(15b, .Lclear_user_tail) |
138 | _ASM_EXTABLE_UA(16b, .Lclear_user_tail) |
139 | _ASM_EXTABLE_UA(17b, .Lclear_user_tail) |
140 | SYM_FUNC_END(rep_stos_alternative) |
141 | EXPORT_SYMBOL(rep_stos_alternative) |
142 | |