1 | /* |
2 | * Copyright IBM Corporation, 2015 |
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify it |
6 | * under the terms of version 2 of the GNU Lesser General Public License |
7 | * as published by the Free Software Foundation. |
8 | * |
9 | * This program is distributed in the hope that it would be useful, but |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
12 | * |
13 | */ |
14 | |
15 | #include <linux/mm.h> |
16 | #include <asm/machdep.h> |
17 | #include <asm/mmu.h> |
18 | |
19 | #include "internal.h" |
20 | |
21 | /* |
22 | * Return true, if the entry has a slot value which |
23 | * the software considers as invalid. |
24 | */ |
25 | static inline bool hpte_soft_invalid(unsigned long hidx) |
26 | { |
27 | return ((hidx & 0xfUL) == 0xfUL); |
28 | } |
29 | |
30 | /* |
31 | * index from 0 - 15 |
32 | */ |
33 | bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) |
34 | { |
35 | return !(hpte_soft_invalid(hidx: __rpte_to_hidx(rpte, index))); |
36 | } |
37 | |
38 | int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, |
39 | pte_t *ptep, unsigned long trap, unsigned long flags, |
40 | int ssize, int subpg_prot) |
41 | { |
42 | real_pte_t rpte; |
43 | unsigned long hpte_group; |
44 | unsigned int subpg_index; |
45 | unsigned long rflags, pa; |
46 | unsigned long old_pte, new_pte, subpg_pte; |
47 | unsigned long vpn, hash, slot, gslot; |
48 | unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift; |
49 | |
50 | /* |
51 | * atomically mark the linux large page PTE busy and dirty |
52 | */ |
53 | do { |
54 | pte_t pte = READ_ONCE(*ptep); |
55 | |
56 | old_pte = pte_val(pte); |
57 | /* If PTE busy, retry the access */ |
58 | if (unlikely(old_pte & H_PAGE_BUSY)) |
59 | return 0; |
60 | /* If PTE permissions don't match, take page fault */ |
61 | if (unlikely(!check_pte_access(access, old_pte))) |
62 | return 1; |
63 | /* |
64 | * Try to lock the PTE, add ACCESSED and DIRTY if it was |
65 | * a write access. Since this is 4K insert of 64K page size |
66 | * also add H_PAGE_COMBO |
67 | */ |
68 | new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO; |
69 | if (access & _PAGE_WRITE) |
70 | new_pte |= _PAGE_DIRTY; |
71 | } while (!pte_xchg(ptep, __pte(val: old_pte), __pte(val: new_pte))); |
72 | |
73 | /* |
74 | * Handle the subpage protection bits |
75 | */ |
76 | subpg_pte = new_pte & ~subpg_prot; |
77 | rflags = htab_convert_pte_flags(subpg_pte, flags); |
78 | |
79 | if (cpu_has_feature(CPU_FTR_NOEXECUTE) && |
80 | !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { |
81 | |
82 | /* |
83 | * No CPU has hugepages but lacks no execute, so we |
84 | * don't need to worry about that case |
85 | */ |
86 | rflags = hash_page_do_lazy_icache(rflags, __pte(val: old_pte), trap); |
87 | } |
88 | |
89 | subpg_index = (ea & (PAGE_SIZE - 1)) >> shift; |
90 | vpn = hpt_vpn(ea, vsid, ssize); |
91 | rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); |
92 | /* |
93 | *None of the sub 4k page is hashed |
94 | */ |
95 | if (!(old_pte & H_PAGE_HASHPTE)) |
96 | goto htab_insert_hpte; |
97 | /* |
98 | * Check if the pte was already inserted into the hash table |
99 | * as a 64k HW page, and invalidate the 64k HPTE if so. |
100 | */ |
101 | if (!(old_pte & H_PAGE_COMBO)) { |
102 | flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); |
103 | /* |
104 | * clear the old slot details from the old and new pte. |
105 | * On hash insert failure we use old pte value and we don't |
106 | * want slot information there if we have a insert failure. |
107 | */ |
108 | old_pte &= ~H_PAGE_HASHPTE; |
109 | new_pte &= ~H_PAGE_HASHPTE; |
110 | goto htab_insert_hpte; |
111 | } |
112 | /* |
113 | * Check for sub page valid and update |
114 | */ |
115 | if (__rpte_sub_valid(rpte, subpg_index)) { |
116 | int ret; |
117 | |
118 | gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, |
119 | subpg_index); |
120 | ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, |
121 | MMU_PAGE_4K, MMU_PAGE_4K, |
122 | ssize, flags); |
123 | |
124 | /* |
125 | * If we failed because typically the HPTE wasn't really here |
126 | * we try an insertion. |
127 | */ |
128 | if (ret == -1) |
129 | goto htab_insert_hpte; |
130 | |
131 | *ptep = __pte(val: new_pte & ~H_PAGE_BUSY); |
132 | return 0; |
133 | } |
134 | |
135 | htab_insert_hpte: |
136 | |
137 | /* |
138 | * Initialize all hidx entries to invalid value, the first time |
139 | * the PTE is about to allocate a 4K HPTE. |
140 | */ |
141 | if (!(old_pte & H_PAGE_COMBO)) |
142 | rpte.hidx = INVALID_RPTE_HIDX; |
143 | |
144 | /* |
145 | * handle H_PAGE_4K_PFN case |
146 | */ |
147 | if (old_pte & H_PAGE_4K_PFN) { |
148 | /* |
149 | * All the sub 4k page have the same |
150 | * physical address. |
151 | */ |
152 | pa = pte_pfn(pte: __pte(val: old_pte)) << HW_PAGE_SHIFT; |
153 | } else { |
154 | pa = pte_pfn(pte: __pte(val: old_pte)) << PAGE_SHIFT; |
155 | pa += (subpg_index << shift); |
156 | } |
157 | hash = hpt_hash(vpn, shift, ssize); |
158 | repeat: |
159 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
160 | |
161 | /* Insert into the hash table, primary slot */ |
162 | slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, |
163 | MMU_PAGE_4K, MMU_PAGE_4K, ssize); |
164 | /* |
165 | * Primary is full, try the secondary |
166 | */ |
167 | if (unlikely(slot == -1)) { |
168 | bool soft_invalid; |
169 | |
170 | hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; |
171 | slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, |
172 | rflags, HPTE_V_SECONDARY, |
173 | MMU_PAGE_4K, MMU_PAGE_4K, |
174 | ssize); |
175 | |
176 | soft_invalid = hpte_soft_invalid(hidx: slot); |
177 | if (unlikely(soft_invalid)) { |
178 | /* |
179 | * We got a valid slot from a hardware point of view. |
180 | * but we cannot use it, because we use this special |
181 | * value; as defined by hpte_soft_invalid(), to track |
182 | * invalid slots. We cannot use it. So invalidate it. |
183 | */ |
184 | gslot = slot & _PTEIDX_GROUP_IX; |
185 | mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn, |
186 | MMU_PAGE_4K, MMU_PAGE_4K, |
187 | ssize, 0); |
188 | } |
189 | |
190 | if (unlikely(slot == -1 || soft_invalid)) { |
191 | /* |
192 | * For soft invalid slot, let's ensure that we release a |
193 | * slot from the primary, with the hope that we will |
194 | * acquire that slot next time we try. This will ensure |
195 | * that we do not get the same soft-invalid slot. |
196 | */ |
197 | if (soft_invalid || (mftb() & 0x1)) |
198 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
199 | |
200 | mmu_hash_ops.hpte_remove(hpte_group); |
201 | /* |
202 | * FIXME!! Should be try the group from which we removed ? |
203 | */ |
204 | goto repeat; |
205 | } |
206 | } |
207 | /* |
208 | * Hypervisor failure. Restore old pte and return -1 |
209 | * similar to __hash_page_* |
210 | */ |
211 | if (unlikely(slot == -2)) { |
212 | *ptep = __pte(val: old_pte); |
213 | hash_failure_debug(ea, access, vsid, trap, ssize, |
214 | MMU_PAGE_4K, MMU_PAGE_4K, old_pte); |
215 | return -1; |
216 | } |
217 | |
218 | new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); |
219 | new_pte |= H_PAGE_HASHPTE; |
220 | |
221 | if (stress_hpt()) |
222 | hpt_do_stress(ea, hpte_group); |
223 | |
224 | *ptep = __pte(new_pte & ~H_PAGE_BUSY); |
225 | return 0; |
226 | } |
227 | |
228 | int __hash_page_64K(unsigned long ea, unsigned long access, |
229 | unsigned long vsid, pte_t *ptep, unsigned long trap, |
230 | unsigned long flags, int ssize) |
231 | { |
232 | real_pte_t rpte; |
233 | unsigned long hpte_group; |
234 | unsigned long rflags, pa; |
235 | unsigned long old_pte, new_pte; |
236 | unsigned long vpn, hash, slot; |
237 | unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift; |
238 | |
239 | /* |
240 | * atomically mark the linux large page PTE busy and dirty |
241 | */ |
242 | do { |
243 | pte_t pte = READ_ONCE(*ptep); |
244 | |
245 | old_pte = pte_val(pte); |
246 | /* If PTE busy, retry the access */ |
247 | if (unlikely(old_pte & H_PAGE_BUSY)) |
248 | return 0; |
249 | /* If PTE permissions don't match, take page fault */ |
250 | if (unlikely(!check_pte_access(access, old_pte))) |
251 | return 1; |
252 | /* |
253 | * Check if PTE has the cache-inhibit bit set |
254 | * If so, bail out and refault as a 4k page |
255 | */ |
256 | if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && |
257 | unlikely(pte_ci(pte))) |
258 | return 0; |
259 | /* |
260 | * Try to lock the PTE, add ACCESSED and DIRTY if it was |
261 | * a write access. |
262 | */ |
263 | new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; |
264 | if (access & _PAGE_WRITE) |
265 | new_pte |= _PAGE_DIRTY; |
266 | } while (!pte_xchg(ptep, __pte(val: old_pte), __pte(val: new_pte))); |
267 | |
268 | rflags = htab_convert_pte_flags(new_pte, flags); |
269 | rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE); |
270 | |
271 | if (cpu_has_feature(CPU_FTR_NOEXECUTE) && |
272 | !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
273 | rflags = hash_page_do_lazy_icache(rflags, __pte(val: old_pte), trap); |
274 | |
275 | vpn = hpt_vpn(ea, vsid, ssize); |
276 | if (unlikely(old_pte & H_PAGE_HASHPTE)) { |
277 | unsigned long gslot; |
278 | |
279 | /* |
280 | * There MIGHT be an HPTE for this pte |
281 | */ |
282 | gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0); |
283 | if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K, |
284 | MMU_PAGE_64K, ssize, |
285 | flags) == -1) |
286 | old_pte &= ~_PAGE_HPTEFLAGS; |
287 | } |
288 | |
289 | if (likely(!(old_pte & H_PAGE_HASHPTE))) { |
290 | |
291 | pa = pte_pfn(pte: __pte(val: old_pte)) << PAGE_SHIFT; |
292 | hash = hpt_hash(vpn, shift, ssize); |
293 | |
294 | repeat: |
295 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
296 | |
297 | /* Insert into the hash table, primary slot */ |
298 | slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, |
299 | MMU_PAGE_64K, MMU_PAGE_64K, |
300 | ssize); |
301 | /* |
302 | * Primary is full, try the secondary |
303 | */ |
304 | if (unlikely(slot == -1)) { |
305 | hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; |
306 | slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, |
307 | rflags, |
308 | HPTE_V_SECONDARY, |
309 | MMU_PAGE_64K, |
310 | MMU_PAGE_64K, ssize); |
311 | if (slot == -1) { |
312 | if (mftb() & 0x1) |
313 | hpte_group = (hash & htab_hash_mask) * |
314 | HPTES_PER_GROUP; |
315 | mmu_hash_ops.hpte_remove(hpte_group); |
316 | /* |
317 | * FIXME!! Should be try the group from which we removed ? |
318 | */ |
319 | goto repeat; |
320 | } |
321 | } |
322 | /* |
323 | * Hypervisor failure. Restore old pte and return -1 |
324 | * similar to __hash_page_* |
325 | */ |
326 | if (unlikely(slot == -2)) { |
327 | *ptep = __pte(val: old_pte); |
328 | hash_failure_debug(ea, access, vsid, trap, ssize, |
329 | MMU_PAGE_64K, MMU_PAGE_64K, old_pte); |
330 | return -1; |
331 | } |
332 | |
333 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; |
334 | new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); |
335 | |
336 | if (stress_hpt()) |
337 | hpt_do_stress(ea, hpte_group); |
338 | } |
339 | |
340 | *ptep = __pte(new_pte & ~H_PAGE_BUSY); |
341 | |
342 | return 0; |
343 | } |
344 | |