1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Page Deallocation Table (PDT) support |
4 | * |
5 | * The Page Deallocation Table (PDT) is maintained by firmware and holds a |
6 | * list of memory addresses in which memory errors were detected. |
7 | * The list contains both single-bit (correctable) and double-bit |
8 | * (uncorrectable) errors. |
9 | * |
10 | * Copyright 2017 by Helge Deller <deller@gmx.de> |
11 | * |
12 | * possible future enhancements: |
13 | * - add userspace interface via procfs or sysfs to clear PDT |
14 | */ |
15 | |
16 | #include <linux/memblock.h> |
17 | #include <linux/seq_file.h> |
18 | #include <linux/kthread.h> |
19 | #include <linux/proc_fs.h> |
20 | #include <linux/initrd.h> |
21 | #include <linux/pgtable.h> |
22 | #include <linux/mm.h> |
23 | |
24 | #include <asm/pdc.h> |
25 | #include <asm/pdcpat.h> |
26 | #include <asm/sections.h> |
27 | #include <asm/pgtable.h> |
28 | |
29 | enum pdt_access_type { |
30 | PDT_NONE, |
31 | PDT_PDC, |
32 | PDT_PAT_NEW, |
33 | PDT_PAT_CELL |
34 | }; |
35 | |
36 | static enum pdt_access_type pdt_type; |
37 | |
38 | /* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */ |
39 | #define PDT_POLL_INTERVAL_DEFAULT (5*60*HZ) |
40 | #define PDT_POLL_INTERVAL_SHORT (1*60*HZ) |
41 | static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT; |
42 | |
43 | /* global PDT status information */ |
44 | static struct pdc_mem_retinfo pdt_status; |
45 | |
46 | #define MAX_PDT_TABLE_SIZE PAGE_SIZE |
47 | #define MAX_PDT_ENTRIES (MAX_PDT_TABLE_SIZE / sizeof(unsigned long)) |
48 | static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss; |
49 | |
50 | /* |
51 | * Constants for the pdt_entry format: |
52 | * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are |
53 | * reserved, bit 62 is the perm bit and bit 63 is the error_type bit. |
54 | * The perm bit indicates whether the error have been verified as a permanent |
55 | * error (value of 1) or has not been verified, and may be transient (value |
56 | * of 0). The error_type bit indicates whether the error is a single bit error |
57 | * (value of 1) or a multiple bit error. |
58 | * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit |
59 | * 63. Those machines don't provide the perm bit. |
60 | */ |
61 | |
62 | #define PDT_ADDR_PHYS_MASK (pdt_type != PDT_PDC ? ~0x3f : ~0x0f) |
63 | #define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL) |
64 | #define PDT_ADDR_SINGLE_ERR 1UL |
65 | |
66 | /* report PDT entries via /proc/meminfo */ |
67 | void arch_report_meminfo(struct seq_file *m) |
68 | { |
69 | if (pdt_type == PDT_NONE) |
70 | return; |
71 | |
72 | seq_printf(m, fmt: "PDT_max_entries: %7lu\n" , |
73 | pdt_status.pdt_size); |
74 | seq_printf(m, fmt: "PDT_cur_entries: %7lu\n" , |
75 | pdt_status.pdt_entries); |
76 | } |
77 | |
78 | static int get_info_pat_new(void) |
79 | { |
80 | struct pdc_pat_mem_retinfo pat_rinfo; |
81 | int ret; |
82 | |
83 | /* newer PAT machines like C8000 report info for all cells */ |
84 | if (is_pdc_pat()) |
85 | ret = pdc_pat_mem_pdt_info(&pat_rinfo); |
86 | else |
87 | return PDC_BAD_PROC; |
88 | |
89 | pdt_status.pdt_size = pat_rinfo.max_pdt_entries; |
90 | pdt_status.pdt_entries = pat_rinfo.current_pdt_entries; |
91 | pdt_status.pdt_status = 0; |
92 | pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc; |
93 | pdt_status.good_mem = pat_rinfo.good_mem; |
94 | |
95 | return ret; |
96 | } |
97 | |
98 | static int get_info_pat_cell(void) |
99 | { |
100 | struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo; |
101 | int ret; |
102 | |
103 | /* older PAT machines like rp5470 report cell info only */ |
104 | if (is_pdc_pat()) |
105 | ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num); |
106 | else |
107 | return PDC_BAD_PROC; |
108 | |
109 | pdt_status.pdt_size = cell_rinfo.max_pdt_entries; |
110 | pdt_status.pdt_entries = cell_rinfo.current_pdt_entries; |
111 | pdt_status.pdt_status = 0; |
112 | pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc; |
113 | pdt_status.good_mem = cell_rinfo.good_mem; |
114 | |
115 | return ret; |
116 | } |
117 | |
118 | static void report_mem_err(unsigned long pde) |
119 | { |
120 | struct pdc_pat_mem_phys_mem_location loc; |
121 | unsigned long addr; |
122 | char dimm_txt[32]; |
123 | |
124 | addr = pde & PDT_ADDR_PHYS_MASK; |
125 | |
126 | /* show DIMM slot description on PAT machines */ |
127 | if (is_pdc_pat()) { |
128 | pdc_pat_mem_get_dimm_phys_location(&loc, addr); |
129 | sprintf(buf: dimm_txt, fmt: "DIMM slot %02x, " , loc.dimm_slot); |
130 | } else |
131 | dimm_txt[0] = 0; |
132 | |
133 | pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n" , |
134 | addr, dimm_txt, |
135 | pde & PDT_ADDR_PERM_ERR ? "permanent " :"" , |
136 | pde & PDT_ADDR_SINGLE_ERR ? "single" :"multi" ); |
137 | } |
138 | |
139 | |
140 | /* |
141 | * pdc_pdt_init() |
142 | * |
143 | * Initialize kernel PDT structures, read initial PDT table from firmware, |
144 | * report all current PDT entries and mark bad memory with memblock_reserve() |
145 | * to avoid that the kernel will use broken memory areas. |
146 | * |
147 | */ |
148 | void __init pdc_pdt_init(void) |
149 | { |
150 | int ret, i; |
151 | unsigned long entries; |
152 | struct pdc_mem_read_pdt pdt_read_ret; |
153 | |
154 | pdt_type = PDT_PAT_NEW; |
155 | ret = get_info_pat_new(); |
156 | |
157 | if (ret != PDC_OK) { |
158 | pdt_type = PDT_PAT_CELL; |
159 | ret = get_info_pat_cell(); |
160 | } |
161 | |
162 | if (ret != PDC_OK) { |
163 | pdt_type = PDT_PDC; |
164 | /* non-PAT machines provide the standard PDC call */ |
165 | ret = pdc_mem_pdt_info(&pdt_status); |
166 | } |
167 | |
168 | if (ret != PDC_OK) { |
169 | pdt_type = PDT_NONE; |
170 | pr_info("PDT: Firmware does not provide any page deallocation" |
171 | " information.\n" ); |
172 | return; |
173 | } |
174 | |
175 | entries = pdt_status.pdt_entries; |
176 | if (WARN_ON(entries > MAX_PDT_ENTRIES)) |
177 | entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES; |
178 | |
179 | pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx," |
180 | " good_mem %lu MB\n" , |
181 | pdt_type == PDT_PDC ? __stringify(PDT_PDC) : |
182 | pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL) |
183 | : __stringify(PDT_PAT_NEW), |
184 | pdt_status.pdt_size, pdt_status.pdt_entries, |
185 | pdt_status.pdt_status, pdt_status.first_dbe_loc, |
186 | pdt_status.good_mem / 1024 / 1024); |
187 | |
188 | if (entries == 0) { |
189 | pr_info("PDT: Firmware reports all memory OK.\n" ); |
190 | return; |
191 | } |
192 | |
193 | if (pdt_status.first_dbe_loc && |
194 | pdt_status.first_dbe_loc <= __pa((unsigned long)&_end)) |
195 | pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n" ); |
196 | |
197 | pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n" , |
198 | entries); |
199 | |
200 | if (pdt_type == PDT_PDC) |
201 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry); |
202 | else { |
203 | #ifdef CONFIG_64BIT |
204 | struct pdc_pat_mem_read_pd_retinfo pat_pret; |
205 | |
206 | if (pdt_type == PDT_PAT_CELL) |
207 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, |
208 | MAX_PDT_ENTRIES); |
209 | else |
210 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry, |
211 | MAX_PDT_TABLE_SIZE, 0); |
212 | #else |
213 | ret = PDC_BAD_PROC; |
214 | #endif |
215 | } |
216 | |
217 | if (ret != PDC_OK) { |
218 | pdt_type = PDT_NONE; |
219 | pr_warn("PDT: Get PDT entries failed with %d\n" , ret); |
220 | return; |
221 | } |
222 | |
223 | for (i = 0; i < pdt_status.pdt_entries; i++) { |
224 | unsigned long addr; |
225 | |
226 | report_mem_err(pde: pdt_entry[i]); |
227 | |
228 | addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK; |
229 | if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && |
230 | addr >= initrd_start && addr < initrd_end) |
231 | pr_crit("CRITICAL: initrd possibly broken " |
232 | "due to bad memory!\n" ); |
233 | |
234 | /* mark memory page bad */ |
235 | memblock_reserve(base: pdt_entry[i] & PAGE_MASK, PAGE_SIZE); |
236 | num_poisoned_pages_inc(pfn: addr >> PAGE_SHIFT); |
237 | } |
238 | } |
239 | |
240 | |
241 | /* |
242 | * This is the PDT kernel thread main loop. |
243 | */ |
244 | |
245 | static int pdt_mainloop(void *unused) |
246 | { |
247 | struct pdc_mem_read_pdt pdt_read_ret; |
248 | struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused; |
249 | unsigned long old_num_entries; |
250 | unsigned long *bad_mem_ptr; |
251 | int num, ret; |
252 | |
253 | for (;;) { |
254 | set_current_state(TASK_INTERRUPTIBLE); |
255 | |
256 | old_num_entries = pdt_status.pdt_entries; |
257 | |
258 | schedule_timeout(timeout: pdt_poll_interval); |
259 | if (kthread_should_stop()) |
260 | break; |
261 | |
262 | /* Do we have new PDT entries? */ |
263 | switch (pdt_type) { |
264 | case PDT_PAT_NEW: |
265 | ret = get_info_pat_new(); |
266 | break; |
267 | case PDT_PAT_CELL: |
268 | ret = get_info_pat_cell(); |
269 | break; |
270 | default: |
271 | ret = pdc_mem_pdt_info(&pdt_status); |
272 | break; |
273 | } |
274 | |
275 | if (ret != PDC_OK) { |
276 | pr_warn("PDT: unexpected failure %d\n" , ret); |
277 | return -EINVAL; |
278 | } |
279 | |
280 | /* if no new PDT entries, just wait again */ |
281 | num = pdt_status.pdt_entries - old_num_entries; |
282 | if (num <= 0) |
283 | continue; |
284 | |
285 | /* decrease poll interval in case we found memory errors */ |
286 | if (pdt_status.pdt_entries && |
287 | pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT) |
288 | pdt_poll_interval = PDT_POLL_INTERVAL_SHORT; |
289 | |
290 | /* limit entries to get */ |
291 | if (num > MAX_PDT_ENTRIES) { |
292 | num = MAX_PDT_ENTRIES; |
293 | pdt_status.pdt_entries = old_num_entries + num; |
294 | } |
295 | |
296 | /* get new entries */ |
297 | switch (pdt_type) { |
298 | #ifdef CONFIG_64BIT |
299 | case PDT_PAT_CELL: |
300 | if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) { |
301 | pr_crit("PDT: too many entries.\n" ); |
302 | return -ENOMEM; |
303 | } |
304 | ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry, |
305 | MAX_PDT_ENTRIES); |
306 | bad_mem_ptr = &pdt_entry[old_num_entries]; |
307 | break; |
308 | case PDT_PAT_NEW: |
309 | ret = pdc_pat_mem_read_pd_pdt(&pat_pret, |
310 | pdt_entry, |
311 | num * sizeof(unsigned long), |
312 | old_num_entries * sizeof(unsigned long)); |
313 | bad_mem_ptr = &pdt_entry[0]; |
314 | break; |
315 | #endif |
316 | default: |
317 | ret = pdc_mem_pdt_read_entries(&pdt_read_ret, |
318 | pdt_entry); |
319 | bad_mem_ptr = &pdt_entry[old_num_entries]; |
320 | break; |
321 | } |
322 | |
323 | /* report and mark memory broken */ |
324 | while (num--) { |
325 | unsigned long pde = *bad_mem_ptr++; |
326 | |
327 | report_mem_err(pde); |
328 | |
329 | #ifdef CONFIG_MEMORY_FAILURE |
330 | if ((pde & PDT_ADDR_PERM_ERR) || |
331 | ((pde & PDT_ADDR_SINGLE_ERR) == 0)) |
332 | memory_failure(pfn: pde >> PAGE_SHIFT, flags: 0); |
333 | else |
334 | soft_offline_page(pfn: pde >> PAGE_SHIFT, flags: 0); |
335 | #else |
336 | pr_crit("PDT: memory error at 0x%lx ignored.\n" |
337 | "Rebuild kernel with CONFIG_MEMORY_FAILURE=y " |
338 | "for real handling.\n" , |
339 | pde & PDT_ADDR_PHYS_MASK); |
340 | #endif |
341 | |
342 | } |
343 | } |
344 | |
345 | return 0; |
346 | } |
347 | |
348 | |
349 | static int __init pdt_initcall(void) |
350 | { |
351 | struct task_struct *kpdtd_task; |
352 | |
353 | if (pdt_type == PDT_NONE) |
354 | return -ENODEV; |
355 | |
356 | kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd" ); |
357 | |
358 | return PTR_ERR_OR_ZERO(ptr: kpdtd_task); |
359 | } |
360 | |
361 | late_initcall(pdt_initcall); |
362 | |