1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * |
4 | * Procedures for interfacing to the RTAS on CHRP machines. |
5 | * |
6 | * Peter Bergner, IBM March 2001. |
7 | * Copyright (C) 2001 IBM. |
8 | */ |
9 | |
10 | #define pr_fmt(fmt) "rtas: " fmt |
11 | |
12 | #include <linux/bsearch.h> |
13 | #include <linux/capability.h> |
14 | #include <linux/delay.h> |
15 | #include <linux/export.h> |
16 | #include <linux/init.h> |
17 | #include <linux/kconfig.h> |
18 | #include <linux/kernel.h> |
19 | #include <linux/lockdep.h> |
20 | #include <linux/memblock.h> |
21 | #include <linux/mutex.h> |
22 | #include <linux/of.h> |
23 | #include <linux/of_fdt.h> |
24 | #include <linux/reboot.h> |
25 | #include <linux/sched.h> |
26 | #include <linux/security.h> |
27 | #include <linux/slab.h> |
28 | #include <linux/spinlock.h> |
29 | #include <linux/stdarg.h> |
30 | #include <linux/syscalls.h> |
31 | #include <linux/types.h> |
32 | #include <linux/uaccess.h> |
33 | #include <linux/xarray.h> |
34 | |
35 | #include <asm/delay.h> |
36 | #include <asm/firmware.h> |
37 | #include <asm/interrupt.h> |
38 | #include <asm/machdep.h> |
39 | #include <asm/mmu.h> |
40 | #include <asm/page.h> |
41 | #include <asm/rtas-work-area.h> |
42 | #include <asm/rtas.h> |
43 | #include <asm/time.h> |
44 | #include <asm/trace.h> |
45 | #include <asm/udbg.h> |
46 | |
47 | struct rtas_filter { |
48 | /* Indexes into the args buffer, -1 if not used */ |
49 | const int buf_idx1; |
50 | const int size_idx1; |
51 | const int buf_idx2; |
52 | const int size_idx2; |
53 | /* |
54 | * Assumed buffer size per the spec if the function does not |
55 | * have a size parameter, e.g. ibm,errinjct. 0 if unused. |
56 | */ |
57 | const int fixed_size; |
58 | }; |
59 | |
60 | /** |
61 | * struct rtas_function - Descriptor for RTAS functions. |
62 | * |
63 | * @token: Value of @name if it exists under the /rtas node. |
64 | * @name: Function name. |
65 | * @filter: If non-NULL, invoking this function via the rtas syscall is |
66 | * generally allowed, and @filter describes constraints on the |
67 | * arguments. See also @banned_for_syscall_on_le. |
68 | * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed |
69 | * but specifically restricted on ppc64le. Such |
70 | * functions are believed to have no users on |
71 | * ppc64le, and we want to keep it that way. It does |
72 | * not make sense for this to be set when @filter |
73 | * is NULL. |
74 | * @lock: Pointer to an optional dedicated per-function mutex. This |
75 | * should be set for functions that require multiple calls in |
76 | * sequence to complete a single operation, and such sequences |
77 | * will disrupt each other if allowed to interleave. Users of |
78 | * this function are required to hold the associated lock for |
79 | * the duration of the call sequence. Add an explanatory |
80 | * comment to the function table entry if setting this member. |
81 | */ |
82 | struct rtas_function { |
83 | s32 token; |
84 | const bool banned_for_syscall_on_le:1; |
85 | const char * const name; |
86 | const struct rtas_filter *filter; |
87 | struct mutex *lock; |
88 | }; |
89 | |
90 | /* |
91 | * Per-function locks for sequence-based RTAS functions. |
92 | */ |
93 | static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); |
94 | static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); |
95 | static DEFINE_MUTEX(rtas_ibm_get_indices_lock); |
96 | static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); |
97 | static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); |
98 | static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); |
99 | DEFINE_MUTEX(rtas_ibm_get_vpd_lock); |
100 | |
101 | static struct rtas_function rtas_function_table[] __ro_after_init = { |
102 | [RTAS_FNIDX__CHECK_EXCEPTION] = { |
103 | .name = "check-exception" , |
104 | }, |
105 | [RTAS_FNIDX__DISPLAY_CHARACTER] = { |
106 | .name = "display-character" , |
107 | .filter = &(const struct rtas_filter) { |
108 | .buf_idx1 = -1, .size_idx1 = -1, |
109 | .buf_idx2 = -1, .size_idx2 = -1, |
110 | }, |
111 | }, |
112 | [RTAS_FNIDX__EVENT_SCAN] = { |
113 | .name = "event-scan" , |
114 | }, |
115 | [RTAS_FNIDX__FREEZE_TIME_BASE] = { |
116 | .name = "freeze-time-base" , |
117 | }, |
118 | [RTAS_FNIDX__GET_POWER_LEVEL] = { |
119 | .name = "get-power-level" , |
120 | .filter = &(const struct rtas_filter) { |
121 | .buf_idx1 = -1, .size_idx1 = -1, |
122 | .buf_idx2 = -1, .size_idx2 = -1, |
123 | }, |
124 | }, |
125 | [RTAS_FNIDX__GET_SENSOR_STATE] = { |
126 | .name = "get-sensor-state" , |
127 | .filter = &(const struct rtas_filter) { |
128 | .buf_idx1 = -1, .size_idx1 = -1, |
129 | .buf_idx2 = -1, .size_idx2 = -1, |
130 | }, |
131 | }, |
132 | [RTAS_FNIDX__GET_TERM_CHAR] = { |
133 | .name = "get-term-char" , |
134 | }, |
135 | [RTAS_FNIDX__GET_TIME_OF_DAY] = { |
136 | .name = "get-time-of-day" , |
137 | .filter = &(const struct rtas_filter) { |
138 | .buf_idx1 = -1, .size_idx1 = -1, |
139 | .buf_idx2 = -1, .size_idx2 = -1, |
140 | }, |
141 | }, |
142 | [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = { |
143 | .name = "ibm,activate-firmware" , |
144 | .filter = &(const struct rtas_filter) { |
145 | .buf_idx1 = -1, .size_idx1 = -1, |
146 | .buf_idx2 = -1, .size_idx2 = -1, |
147 | }, |
148 | /* |
149 | * PAPR+ as of v2.13 doesn't explicitly impose any |
150 | * restriction, but this typically requires multiple |
151 | * calls before success, and there's no reason to |
152 | * allow sequences to interleave. |
153 | */ |
154 | .lock = &rtas_ibm_activate_firmware_lock, |
155 | }, |
156 | [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { |
157 | .name = "ibm,cbe-start-ptcal" , |
158 | }, |
159 | [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = { |
160 | .name = "ibm,cbe-stop-ptcal" , |
161 | }, |
162 | [RTAS_FNIDX__IBM_CHANGE_MSI] = { |
163 | .name = "ibm,change-msi" , |
164 | }, |
165 | [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = { |
166 | .name = "ibm,close-errinjct" , |
167 | .filter = &(const struct rtas_filter) { |
168 | .buf_idx1 = -1, .size_idx1 = -1, |
169 | .buf_idx2 = -1, .size_idx2 = -1, |
170 | }, |
171 | }, |
172 | [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = { |
173 | .name = "ibm,configure-bridge" , |
174 | }, |
175 | [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = { |
176 | .name = "ibm,configure-connector" , |
177 | .filter = &(const struct rtas_filter) { |
178 | .buf_idx1 = 0, .size_idx1 = -1, |
179 | .buf_idx2 = 1, .size_idx2 = -1, |
180 | .fixed_size = 4096, |
181 | }, |
182 | }, |
183 | [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = { |
184 | .name = "ibm,configure-kernel-dump" , |
185 | }, |
186 | [RTAS_FNIDX__IBM_CONFIGURE_PE] = { |
187 | .name = "ibm,configure-pe" , |
188 | }, |
189 | [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = { |
190 | .name = "ibm,create-pe-dma-window" , |
191 | }, |
192 | [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = { |
193 | .name = "ibm,display-message" , |
194 | .filter = &(const struct rtas_filter) { |
195 | .buf_idx1 = 0, .size_idx1 = -1, |
196 | .buf_idx2 = -1, .size_idx2 = -1, |
197 | }, |
198 | }, |
199 | [RTAS_FNIDX__IBM_ERRINJCT] = { |
200 | .name = "ibm,errinjct" , |
201 | .filter = &(const struct rtas_filter) { |
202 | .buf_idx1 = 2, .size_idx1 = -1, |
203 | .buf_idx2 = -1, .size_idx2 = -1, |
204 | .fixed_size = 1024, |
205 | }, |
206 | }, |
207 | [RTAS_FNIDX__IBM_EXTI2C] = { |
208 | .name = "ibm,exti2c" , |
209 | }, |
210 | [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = { |
211 | .name = "ibm,get-config-addr-info" , |
212 | }, |
213 | [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = { |
214 | .name = "ibm,get-config-addr-info2" , |
215 | .filter = &(const struct rtas_filter) { |
216 | .buf_idx1 = -1, .size_idx1 = -1, |
217 | .buf_idx2 = -1, .size_idx2 = -1, |
218 | }, |
219 | }, |
220 | [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = { |
221 | .name = "ibm,get-dynamic-sensor-state" , |
222 | .filter = &(const struct rtas_filter) { |
223 | .buf_idx1 = 1, .size_idx1 = -1, |
224 | .buf_idx2 = -1, .size_idx2 = -1, |
225 | }, |
226 | /* |
227 | * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS |
228 | * must not call ibm,get-dynamic-sensor-state with |
229 | * different inputs until a non-retry status has been |
230 | * returned. |
231 | */ |
232 | .lock = &rtas_ibm_get_dynamic_sensor_state_lock, |
233 | }, |
234 | [RTAS_FNIDX__IBM_GET_INDICES] = { |
235 | .name = "ibm,get-indices" , |
236 | .filter = &(const struct rtas_filter) { |
237 | .buf_idx1 = 2, .size_idx1 = 3, |
238 | .buf_idx2 = -1, .size_idx2 = -1, |
239 | }, |
240 | /* |
241 | * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not |
242 | * interleave ibm,get-indices call sequences with |
243 | * different inputs. |
244 | */ |
245 | .lock = &rtas_ibm_get_indices_lock, |
246 | }, |
247 | [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { |
248 | .name = "ibm,get-rio-topology" , |
249 | }, |
250 | [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = { |
251 | .name = "ibm,get-system-parameter" , |
252 | .filter = &(const struct rtas_filter) { |
253 | .buf_idx1 = 1, .size_idx1 = 2, |
254 | .buf_idx2 = -1, .size_idx2 = -1, |
255 | }, |
256 | }, |
257 | [RTAS_FNIDX__IBM_GET_VPD] = { |
258 | .name = "ibm,get-vpd" , |
259 | .filter = &(const struct rtas_filter) { |
260 | .buf_idx1 = 0, .size_idx1 = -1, |
261 | .buf_idx2 = 1, .size_idx2 = 2, |
262 | }, |
263 | /* |
264 | * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences |
265 | * should not be allowed to interleave. |
266 | */ |
267 | .lock = &rtas_ibm_get_vpd_lock, |
268 | }, |
269 | [RTAS_FNIDX__IBM_GET_XIVE] = { |
270 | .name = "ibm,get-xive" , |
271 | }, |
272 | [RTAS_FNIDX__IBM_INT_OFF] = { |
273 | .name = "ibm,int-off" , |
274 | }, |
275 | [RTAS_FNIDX__IBM_INT_ON] = { |
276 | .name = "ibm,int-on" , |
277 | }, |
278 | [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = { |
279 | .name = "ibm,io-quiesce-ack" , |
280 | }, |
281 | [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = { |
282 | .name = "ibm,lpar-perftools" , |
283 | .filter = &(const struct rtas_filter) { |
284 | .buf_idx1 = 2, .size_idx1 = 3, |
285 | .buf_idx2 = -1, .size_idx2 = -1, |
286 | }, |
287 | /* |
288 | * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow |
289 | * only one call sequence in progress at a time. |
290 | */ |
291 | .lock = &rtas_ibm_lpar_perftools_lock, |
292 | }, |
293 | [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { |
294 | .name = "ibm,manage-flash-image" , |
295 | }, |
296 | [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = { |
297 | .name = "ibm,manage-storage-preservation" , |
298 | }, |
299 | [RTAS_FNIDX__IBM_NMI_INTERLOCK] = { |
300 | .name = "ibm,nmi-interlock" , |
301 | }, |
302 | [RTAS_FNIDX__IBM_NMI_REGISTER] = { |
303 | .name = "ibm,nmi-register" , |
304 | }, |
305 | [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = { |
306 | .name = "ibm,open-errinjct" , |
307 | .filter = &(const struct rtas_filter) { |
308 | .buf_idx1 = -1, .size_idx1 = -1, |
309 | .buf_idx2 = -1, .size_idx2 = -1, |
310 | }, |
311 | }, |
312 | [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = { |
313 | .name = "ibm,open-sriov-allow-unfreeze" , |
314 | }, |
315 | [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = { |
316 | .name = "ibm,open-sriov-map-pe-number" , |
317 | }, |
318 | [RTAS_FNIDX__IBM_OS_TERM] = { |
319 | .name = "ibm,os-term" , |
320 | }, |
321 | [RTAS_FNIDX__IBM_PARTNER_CONTROL] = { |
322 | .name = "ibm,partner-control" , |
323 | }, |
324 | [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = { |
325 | .name = "ibm,physical-attestation" , |
326 | .filter = &(const struct rtas_filter) { |
327 | .buf_idx1 = 0, .size_idx1 = 1, |
328 | .buf_idx2 = -1, .size_idx2 = -1, |
329 | }, |
330 | /* |
331 | * This follows a sequence-based pattern similar to |
332 | * ibm,get-vpd et al. Since PAPR+ restricts |
333 | * interleaving call sequences for other functions of |
334 | * this style, assume the restriction applies here, |
335 | * even though it's not explicit in the spec. |
336 | */ |
337 | .lock = &rtas_ibm_physical_attestation_lock, |
338 | }, |
339 | [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { |
340 | .name = "ibm,platform-dump" , |
341 | .filter = &(const struct rtas_filter) { |
342 | .buf_idx1 = 4, .size_idx1 = 5, |
343 | .buf_idx2 = -1, .size_idx2 = -1, |
344 | }, |
345 | /* |
346 | * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent |
347 | * sequences of ibm,platform-dump are allowed if they |
348 | * are operating on different dump tags. So leave the |
349 | * lock pointer unset for now. This may need |
350 | * reconsideration if kernel-internal users appear. |
351 | */ |
352 | }, |
353 | [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { |
354 | .name = "ibm,power-off-ups" , |
355 | }, |
356 | [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = { |
357 | .name = "ibm,query-interrupt-source-number" , |
358 | }, |
359 | [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = { |
360 | .name = "ibm,query-pe-dma-window" , |
361 | }, |
362 | [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = { |
363 | .name = "ibm,read-pci-config" , |
364 | }, |
365 | [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = { |
366 | .name = "ibm,read-slot-reset-state" , |
367 | .filter = &(const struct rtas_filter) { |
368 | .buf_idx1 = -1, .size_idx1 = -1, |
369 | .buf_idx2 = -1, .size_idx2 = -1, |
370 | }, |
371 | }, |
372 | [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { |
373 | .name = "ibm,read-slot-reset-state2" , |
374 | }, |
375 | [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { |
376 | .name = "ibm,remove-pe-dma-window" , |
377 | }, |
378 | [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { |
379 | /* |
380 | * Note: PAPR+ v2.13 7.3.31.4.1 spells this as |
381 | * "ibm,reset-pe-dma-windows" (plural), but RTAS |
382 | * implementations use the singular form in practice. |
383 | */ |
384 | .name = "ibm,reset-pe-dma-window" , |
385 | }, |
386 | [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { |
387 | .name = "ibm,scan-log-dump" , |
388 | .filter = &(const struct rtas_filter) { |
389 | .buf_idx1 = 0, .size_idx1 = 1, |
390 | .buf_idx2 = -1, .size_idx2 = -1, |
391 | }, |
392 | }, |
393 | [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { |
394 | .name = "ibm,set-dynamic-indicator" , |
395 | .filter = &(const struct rtas_filter) { |
396 | .buf_idx1 = 2, .size_idx1 = -1, |
397 | .buf_idx2 = -1, .size_idx2 = -1, |
398 | }, |
399 | /* |
400 | * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call |
401 | * this function with different inputs until a |
402 | * non-retry status has been returned. |
403 | */ |
404 | .lock = &rtas_ibm_set_dynamic_indicator_lock, |
405 | }, |
406 | [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { |
407 | .name = "ibm,set-eeh-option" , |
408 | .filter = &(const struct rtas_filter) { |
409 | .buf_idx1 = -1, .size_idx1 = -1, |
410 | .buf_idx2 = -1, .size_idx2 = -1, |
411 | }, |
412 | }, |
413 | [RTAS_FNIDX__IBM_SET_SLOT_RESET] = { |
414 | .name = "ibm,set-slot-reset" , |
415 | }, |
416 | [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = { |
417 | .name = "ibm,set-system-parameter" , |
418 | .filter = &(const struct rtas_filter) { |
419 | .buf_idx1 = 1, .size_idx1 = -1, |
420 | .buf_idx2 = -1, .size_idx2 = -1, |
421 | }, |
422 | }, |
423 | [RTAS_FNIDX__IBM_SET_XIVE] = { |
424 | .name = "ibm,set-xive" , |
425 | }, |
426 | [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = { |
427 | .name = "ibm,slot-error-detail" , |
428 | }, |
429 | [RTAS_FNIDX__IBM_SUSPEND_ME] = { |
430 | .name = "ibm,suspend-me" , |
431 | .banned_for_syscall_on_le = true, |
432 | .filter = &(const struct rtas_filter) { |
433 | .buf_idx1 = -1, .size_idx1 = -1, |
434 | .buf_idx2 = -1, .size_idx2 = -1, |
435 | }, |
436 | }, |
437 | [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = { |
438 | .name = "ibm,tune-dma-parms" , |
439 | }, |
440 | [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = { |
441 | .name = "ibm,update-flash-64-and-reboot" , |
442 | }, |
443 | [RTAS_FNIDX__IBM_UPDATE_NODES] = { |
444 | .name = "ibm,update-nodes" , |
445 | .banned_for_syscall_on_le = true, |
446 | .filter = &(const struct rtas_filter) { |
447 | .buf_idx1 = 0, .size_idx1 = -1, |
448 | .buf_idx2 = -1, .size_idx2 = -1, |
449 | .fixed_size = 4096, |
450 | }, |
451 | }, |
452 | [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = { |
453 | .name = "ibm,update-properties" , |
454 | .banned_for_syscall_on_le = true, |
455 | .filter = &(const struct rtas_filter) { |
456 | .buf_idx1 = 0, .size_idx1 = -1, |
457 | .buf_idx2 = -1, .size_idx2 = -1, |
458 | .fixed_size = 4096, |
459 | }, |
460 | }, |
461 | [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = { |
462 | .name = "ibm,validate-flash-image" , |
463 | }, |
464 | [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = { |
465 | .name = "ibm,write-pci-config" , |
466 | }, |
467 | [RTAS_FNIDX__NVRAM_FETCH] = { |
468 | .name = "nvram-fetch" , |
469 | }, |
470 | [RTAS_FNIDX__NVRAM_STORE] = { |
471 | .name = "nvram-store" , |
472 | }, |
473 | [RTAS_FNIDX__POWER_OFF] = { |
474 | .name = "power-off" , |
475 | }, |
476 | [RTAS_FNIDX__PUT_TERM_CHAR] = { |
477 | .name = "put-term-char" , |
478 | }, |
479 | [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = { |
480 | .name = "query-cpu-stopped-state" , |
481 | }, |
482 | [RTAS_FNIDX__READ_PCI_CONFIG] = { |
483 | .name = "read-pci-config" , |
484 | }, |
485 | [RTAS_FNIDX__RTAS_LAST_ERROR] = { |
486 | .name = "rtas-last-error" , |
487 | }, |
488 | [RTAS_FNIDX__SET_INDICATOR] = { |
489 | .name = "set-indicator" , |
490 | .filter = &(const struct rtas_filter) { |
491 | .buf_idx1 = -1, .size_idx1 = -1, |
492 | .buf_idx2 = -1, .size_idx2 = -1, |
493 | }, |
494 | }, |
495 | [RTAS_FNIDX__SET_POWER_LEVEL] = { |
496 | .name = "set-power-level" , |
497 | .filter = &(const struct rtas_filter) { |
498 | .buf_idx1 = -1, .size_idx1 = -1, |
499 | .buf_idx2 = -1, .size_idx2 = -1, |
500 | }, |
501 | }, |
502 | [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = { |
503 | .name = "set-time-for-power-on" , |
504 | .filter = &(const struct rtas_filter) { |
505 | .buf_idx1 = -1, .size_idx1 = -1, |
506 | .buf_idx2 = -1, .size_idx2 = -1, |
507 | }, |
508 | }, |
509 | [RTAS_FNIDX__SET_TIME_OF_DAY] = { |
510 | .name = "set-time-of-day" , |
511 | .filter = &(const struct rtas_filter) { |
512 | .buf_idx1 = -1, .size_idx1 = -1, |
513 | .buf_idx2 = -1, .size_idx2 = -1, |
514 | }, |
515 | }, |
516 | [RTAS_FNIDX__START_CPU] = { |
517 | .name = "start-cpu" , |
518 | }, |
519 | [RTAS_FNIDX__STOP_SELF] = { |
520 | .name = "stop-self" , |
521 | }, |
522 | [RTAS_FNIDX__SYSTEM_REBOOT] = { |
523 | .name = "system-reboot" , |
524 | }, |
525 | [RTAS_FNIDX__THAW_TIME_BASE] = { |
526 | .name = "thaw-time-base" , |
527 | }, |
528 | [RTAS_FNIDX__WRITE_PCI_CONFIG] = { |
529 | .name = "write-pci-config" , |
530 | }, |
531 | }; |
532 | |
533 | #define for_each_rtas_function(funcp) \ |
534 | for (funcp = &rtas_function_table[0]; \ |
535 | funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ |
536 | ++funcp) |
537 | |
538 | /* |
539 | * Nearly all RTAS calls need to be serialized. All uses of the |
540 | * default rtas_args block must hold rtas_lock. |
541 | * |
542 | * Exceptions to the RTAS serialization requirement (e.g. stop-self) |
543 | * must use a separate rtas_args structure. |
544 | */ |
545 | static DEFINE_RAW_SPINLOCK(rtas_lock); |
546 | static struct rtas_args rtas_args; |
547 | |
548 | /** |
549 | * rtas_function_token() - RTAS function token lookup. |
550 | * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. |
551 | * |
552 | * Context: Any context. |
553 | * Return: the token value for the function if implemented by this platform, |
554 | * otherwise RTAS_UNKNOWN_SERVICE. |
555 | */ |
556 | s32 rtas_function_token(const rtas_fn_handle_t handle) |
557 | { |
558 | const size_t index = handle.index; |
559 | const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table); |
560 | |
561 | if (WARN_ONCE(out_of_bounds, "invalid function index %zu" , index)) |
562 | return RTAS_UNKNOWN_SERVICE; |
563 | /* |
564 | * Various drivers attempt token lookups on non-RTAS |
565 | * platforms. |
566 | */ |
567 | if (!rtas.dev) |
568 | return RTAS_UNKNOWN_SERVICE; |
569 | |
570 | return rtas_function_table[index].token; |
571 | } |
572 | EXPORT_SYMBOL_GPL(rtas_function_token); |
573 | |
574 | static int rtas_function_cmp(const void *a, const void *b) |
575 | { |
576 | const struct rtas_function *f1 = a; |
577 | const struct rtas_function *f2 = b; |
578 | |
579 | return strcmp(f1->name, f2->name); |
580 | } |
581 | |
582 | /* |
583 | * Boot-time initialization of the function table needs the lookup to |
584 | * return a non-const-qualified object. Use rtas_name_to_function() |
585 | * in all other contexts. |
586 | */ |
587 | static struct rtas_function *__rtas_name_to_function(const char *name) |
588 | { |
589 | const struct rtas_function key = { |
590 | .name = name, |
591 | }; |
592 | struct rtas_function *found; |
593 | |
594 | found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table), |
595 | sizeof(rtas_function_table[0]), rtas_function_cmp); |
596 | |
597 | return found; |
598 | } |
599 | |
600 | static const struct rtas_function *rtas_name_to_function(const char *name) |
601 | { |
602 | return __rtas_name_to_function(name); |
603 | } |
604 | |
605 | static DEFINE_XARRAY(rtas_token_to_function_xarray); |
606 | |
607 | static int __init rtas_token_to_function_xarray_init(void) |
608 | { |
609 | const struct rtas_function *func; |
610 | int err = 0; |
611 | |
612 | for_each_rtas_function(func) { |
613 | const s32 token = func->token; |
614 | |
615 | if (token == RTAS_UNKNOWN_SERVICE) |
616 | continue; |
617 | |
618 | err = xa_err(xa_store(&rtas_token_to_function_xarray, |
619 | token, (void *)func, GFP_KERNEL)); |
620 | if (err) |
621 | break; |
622 | } |
623 | |
624 | return err; |
625 | } |
626 | arch_initcall(rtas_token_to_function_xarray_init); |
627 | |
628 | /* |
629 | * For use by sys_rtas(), where the token value is provided by user |
630 | * space and we don't want to warn on failed lookups. |
631 | */ |
632 | static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) |
633 | { |
634 | return xa_load(&rtas_token_to_function_xarray, index: token); |
635 | } |
636 | |
637 | /* |
638 | * Reverse lookup for deriving the function descriptor from a |
639 | * known-good token value in contexts where the former is not already |
640 | * available. @token must be valid, e.g. derived from the result of a |
641 | * prior lookup against the function table. |
642 | */ |
643 | static const struct rtas_function *rtas_token_to_function(s32 token) |
644 | { |
645 | const struct rtas_function *func; |
646 | |
647 | if (WARN_ONCE(token < 0, "invalid token %d" , token)) |
648 | return NULL; |
649 | |
650 | func = rtas_token_to_function_untrusted(token); |
651 | if (func) |
652 | return func; |
653 | /* |
654 | * Fall back to linear scan in case the reverse mapping hasn't |
655 | * been initialized yet. |
656 | */ |
657 | if (xa_empty(xa: &rtas_token_to_function_xarray)) { |
658 | for_each_rtas_function(func) { |
659 | if (func->token == token) |
660 | return func; |
661 | } |
662 | } |
663 | |
664 | WARN_ONCE(true, "unexpected failed lookup for token %d" , token); |
665 | return NULL; |
666 | } |
667 | |
668 | /* This is here deliberately so it's only used in this file */ |
669 | void enter_rtas(unsigned long); |
670 | |
671 | static void __do_enter_rtas(struct rtas_args *args) |
672 | { |
673 | enter_rtas(__pa(args)); |
674 | srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ |
675 | } |
676 | |
677 | static void __do_enter_rtas_trace(struct rtas_args *args) |
678 | { |
679 | const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); |
680 | |
681 | /* |
682 | * If there is a per-function lock, it must be held by the |
683 | * caller. |
684 | */ |
685 | if (func->lock) |
686 | lockdep_assert_held(func->lock); |
687 | |
688 | if (args == &rtas_args) |
689 | lockdep_assert_held(&rtas_lock); |
690 | |
691 | trace_rtas_input(args, func->name); |
692 | trace_rtas_ll_entry(args); |
693 | |
694 | __do_enter_rtas(args); |
695 | |
696 | trace_rtas_ll_exit(args); |
697 | trace_rtas_output(args, func->name); |
698 | } |
699 | |
700 | static void do_enter_rtas(struct rtas_args *args) |
701 | { |
702 | const unsigned long msr = mfmsr(); |
703 | /* |
704 | * Situations where we want to skip any active tracepoints for |
705 | * safety reasons: |
706 | * |
707 | * 1. The last code executed on an offline CPU as it stops, |
708 | * i.e. we're about to call stop-self. The tracepoints' |
709 | * function name lookup uses xarray, which uses RCU, which |
710 | * isn't valid to call on an offline CPU. Any events |
711 | * emitted on an offline CPU will be discarded anyway. |
712 | * |
713 | * 2. In real mode, as when invoking ibm,nmi-interlock from |
714 | * the pseries MCE handler. We cannot count on trace |
715 | * buffers or the entries in rtas_token_to_function_xarray |
716 | * to be contained in the RMO. |
717 | */ |
718 | const unsigned long mask = MSR_IR | MSR_DR; |
719 | const bool can_trace = likely(cpu_online(raw_smp_processor_id()) && |
720 | (msr & mask) == mask); |
721 | /* |
722 | * Make sure MSR[RI] is currently enabled as it will be forced later |
723 | * in enter_rtas. |
724 | */ |
725 | BUG_ON(!(msr & MSR_RI)); |
726 | |
727 | BUG_ON(!irqs_disabled()); |
728 | |
729 | hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ |
730 | |
731 | if (can_trace) |
732 | __do_enter_rtas_trace(args); |
733 | else |
734 | __do_enter_rtas(args); |
735 | } |
736 | |
737 | struct rtas_t rtas; |
738 | |
739 | DEFINE_SPINLOCK(rtas_data_buf_lock); |
740 | EXPORT_SYMBOL_GPL(rtas_data_buf_lock); |
741 | |
742 | char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K); |
743 | EXPORT_SYMBOL_GPL(rtas_data_buf); |
744 | |
745 | unsigned long rtas_rmo_buf; |
746 | |
747 | /* |
748 | * If non-NULL, this gets called when the kernel terminates. |
749 | * This is done like this so rtas_flash can be a module. |
750 | */ |
751 | void (*rtas_flash_term_hook)(int); |
752 | EXPORT_SYMBOL_GPL(rtas_flash_term_hook); |
753 | |
754 | /* |
755 | * call_rtas_display_status and call_rtas_display_status_delay |
756 | * are designed only for very early low-level debugging, which |
757 | * is why the token is hard-coded to 10. |
758 | */ |
759 | static void call_rtas_display_status(unsigned char c) |
760 | { |
761 | unsigned long flags; |
762 | |
763 | if (!rtas.base) |
764 | return; |
765 | |
766 | raw_spin_lock_irqsave(&rtas_lock, flags); |
767 | rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); |
768 | raw_spin_unlock_irqrestore(&rtas_lock, flags); |
769 | } |
770 | |
771 | static void call_rtas_display_status_delay(char c) |
772 | { |
773 | static int pending_newline = 0; /* did last write end with unprinted newline? */ |
774 | static int width = 16; |
775 | |
776 | if (c == '\n') { |
777 | while (width-- > 0) |
778 | call_rtas_display_status(c: ' '); |
779 | width = 16; |
780 | mdelay(500); |
781 | pending_newline = 1; |
782 | } else { |
783 | if (pending_newline) { |
784 | call_rtas_display_status(c: '\r'); |
785 | call_rtas_display_status(c: '\n'); |
786 | } |
787 | pending_newline = 0; |
788 | if (width--) { |
789 | call_rtas_display_status(c); |
790 | udelay(10000); |
791 | } |
792 | } |
793 | } |
794 | |
795 | void __init udbg_init_rtas_panel(void) |
796 | { |
797 | udbg_putc = call_rtas_display_status_delay; |
798 | } |
799 | |
800 | #ifdef CONFIG_UDBG_RTAS_CONSOLE |
801 | |
802 | /* If you think you're dying before early_init_dt_scan_rtas() does its |
803 | * work, you can hard code the token values for your firmware here and |
804 | * hardcode rtas.base/entry etc. |
805 | */ |
806 | static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; |
807 | static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; |
808 | |
809 | static void udbg_rtascon_putc(char c) |
810 | { |
811 | int tries; |
812 | |
813 | if (!rtas.base) |
814 | return; |
815 | |
816 | /* Add CRs before LFs */ |
817 | if (c == '\n') |
818 | udbg_rtascon_putc('\r'); |
819 | |
820 | /* if there is more than one character to be displayed, wait a bit */ |
821 | for (tries = 0; tries < 16; tries++) { |
822 | if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) |
823 | break; |
824 | udelay(1000); |
825 | } |
826 | } |
827 | |
828 | static int udbg_rtascon_getc_poll(void) |
829 | { |
830 | int c; |
831 | |
832 | if (!rtas.base) |
833 | return -1; |
834 | |
835 | if (rtas_call(rtas_getchar_token, 0, 2, &c)) |
836 | return -1; |
837 | |
838 | return c; |
839 | } |
840 | |
841 | static int udbg_rtascon_getc(void) |
842 | { |
843 | int c; |
844 | |
845 | while ((c = udbg_rtascon_getc_poll()) == -1) |
846 | ; |
847 | |
848 | return c; |
849 | } |
850 | |
851 | |
852 | void __init udbg_init_rtas_console(void) |
853 | { |
854 | udbg_putc = udbg_rtascon_putc; |
855 | udbg_getc = udbg_rtascon_getc; |
856 | udbg_getc_poll = udbg_rtascon_getc_poll; |
857 | } |
858 | #endif /* CONFIG_UDBG_RTAS_CONSOLE */ |
859 | |
860 | void rtas_progress(char *s, unsigned short hex) |
861 | { |
862 | struct device_node *root; |
863 | int width; |
864 | const __be32 *p; |
865 | char *os; |
866 | static int display_character, set_indicator; |
867 | static int display_width, display_lines, form_feed; |
868 | static const int *row_width; |
869 | static DEFINE_SPINLOCK(progress_lock); |
870 | static int current_line; |
871 | static int pending_newline = 0; /* did last write end with unprinted newline? */ |
872 | |
873 | if (!rtas.base) |
874 | return; |
875 | |
876 | if (display_width == 0) { |
877 | display_width = 0x10; |
878 | if ((root = of_find_node_by_path(path: "/rtas" ))) { |
879 | if ((p = of_get_property(node: root, |
880 | name: "ibm,display-line-length" , NULL))) |
881 | display_width = be32_to_cpu(*p); |
882 | if ((p = of_get_property(node: root, |
883 | name: "ibm,form-feed" , NULL))) |
884 | form_feed = be32_to_cpu(*p); |
885 | if ((p = of_get_property(node: root, |
886 | name: "ibm,display-number-of-lines" , NULL))) |
887 | display_lines = be32_to_cpu(*p); |
888 | row_width = of_get_property(node: root, |
889 | name: "ibm,display-truncation-length" , NULL); |
890 | of_node_put(node: root); |
891 | } |
892 | display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER); |
893 | set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR); |
894 | } |
895 | |
896 | if (display_character == RTAS_UNKNOWN_SERVICE) { |
897 | /* use hex display if available */ |
898 | if (set_indicator != RTAS_UNKNOWN_SERVICE) |
899 | rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); |
900 | return; |
901 | } |
902 | |
903 | spin_lock(lock: &progress_lock); |
904 | |
905 | /* |
906 | * Last write ended with newline, but we didn't print it since |
907 | * it would just clear the bottom line of output. Print it now |
908 | * instead. |
909 | * |
910 | * If no newline is pending and form feed is supported, clear the |
911 | * display with a form feed; otherwise, print a CR to start output |
912 | * at the beginning of the line. |
913 | */ |
914 | if (pending_newline) { |
915 | rtas_call(display_character, 1, 1, NULL, '\r'); |
916 | rtas_call(display_character, 1, 1, NULL, '\n'); |
917 | pending_newline = 0; |
918 | } else { |
919 | current_line = 0; |
920 | if (form_feed) |
921 | rtas_call(display_character, 1, 1, NULL, |
922 | (char)form_feed); |
923 | else |
924 | rtas_call(display_character, 1, 1, NULL, '\r'); |
925 | } |
926 | |
927 | if (row_width) |
928 | width = row_width[current_line]; |
929 | else |
930 | width = display_width; |
931 | os = s; |
932 | while (*os) { |
933 | if (*os == '\n' || *os == '\r') { |
934 | /* If newline is the last character, save it |
935 | * until next call to avoid bumping up the |
936 | * display output. |
937 | */ |
938 | if (*os == '\n' && !os[1]) { |
939 | pending_newline = 1; |
940 | current_line++; |
941 | if (current_line > display_lines-1) |
942 | current_line = display_lines-1; |
943 | spin_unlock(lock: &progress_lock); |
944 | return; |
945 | } |
946 | |
947 | /* RTAS wants CR-LF, not just LF */ |
948 | |
949 | if (*os == '\n') { |
950 | rtas_call(display_character, 1, 1, NULL, '\r'); |
951 | rtas_call(display_character, 1, 1, NULL, '\n'); |
952 | } else { |
953 | /* CR might be used to re-draw a line, so we'll |
954 | * leave it alone and not add LF. |
955 | */ |
956 | rtas_call(display_character, 1, 1, NULL, *os); |
957 | } |
958 | |
959 | if (row_width) |
960 | width = row_width[current_line]; |
961 | else |
962 | width = display_width; |
963 | } else { |
964 | width--; |
965 | rtas_call(display_character, 1, 1, NULL, *os); |
966 | } |
967 | |
968 | os++; |
969 | |
970 | /* if we overwrite the screen length */ |
971 | if (width <= 0) |
972 | while ((*os != 0) && (*os != '\n') && (*os != '\r')) |
973 | os++; |
974 | } |
975 | |
976 | spin_unlock(lock: &progress_lock); |
977 | } |
978 | EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ |
979 | |
980 | int rtas_token(const char *service) |
981 | { |
982 | const struct rtas_function *func; |
983 | const __be32 *tokp; |
984 | |
985 | if (rtas.dev == NULL) |
986 | return RTAS_UNKNOWN_SERVICE; |
987 | |
988 | func = rtas_name_to_function(name: service); |
989 | if (func) |
990 | return func->token; |
991 | /* |
992 | * The caller is looking up a name that is not known to be an |
993 | * RTAS function. Either it's a function that needs to be |
994 | * added to the table, or they're misusing rtas_token() to |
995 | * access non-function properties of the /rtas node. Warn and |
996 | * fall back to the legacy behavior. |
997 | */ |
998 | WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n" , |
999 | service); |
1000 | |
1001 | tokp = of_get_property(node: rtas.dev, name: service, NULL); |
1002 | return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; |
1003 | } |
1004 | EXPORT_SYMBOL_GPL(rtas_token); |
1005 | |
1006 | #ifdef CONFIG_RTAS_ERROR_LOGGING |
1007 | |
1008 | static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; |
1009 | |
1010 | /* |
1011 | * Return the firmware-specified size of the error log buffer |
1012 | * for all rtas calls that require an error buffer argument. |
1013 | * This includes 'check-exception' and 'rtas-last-error'. |
1014 | */ |
1015 | int rtas_get_error_log_max(void) |
1016 | { |
1017 | return rtas_error_log_max; |
1018 | } |
1019 | |
1020 | static void __init init_error_log_max(void) |
1021 | { |
1022 | static const char propname[] __initconst = "rtas-error-log-max" ; |
1023 | u32 max; |
1024 | |
1025 | if (of_property_read_u32(rtas.dev, propname, &max)) { |
1026 | pr_warn("%s not found, using default of %u\n" , |
1027 | propname, RTAS_ERROR_LOG_MAX); |
1028 | max = RTAS_ERROR_LOG_MAX; |
1029 | } |
1030 | |
1031 | if (max > RTAS_ERROR_LOG_MAX) { |
1032 | pr_warn("%s = %u, clamping max error log size to %u\n" , |
1033 | propname, max, RTAS_ERROR_LOG_MAX); |
1034 | max = RTAS_ERROR_LOG_MAX; |
1035 | } |
1036 | |
1037 | rtas_error_log_max = max; |
1038 | } |
1039 | |
1040 | |
1041 | static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; |
1042 | |
1043 | /** Return a copy of the detailed error text associated with the |
1044 | * most recent failed call to rtas. Because the error text |
1045 | * might go stale if there are any other intervening rtas calls, |
1046 | * this routine must be called atomically with whatever produced |
1047 | * the error (i.e. with rtas_lock still held from the previous call). |
1048 | */ |
1049 | static char *__fetch_rtas_last_error(char *altbuf) |
1050 | { |
1051 | const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR); |
1052 | struct rtas_args err_args, save_args; |
1053 | u32 bufsz; |
1054 | char *buf = NULL; |
1055 | |
1056 | lockdep_assert_held(&rtas_lock); |
1057 | |
1058 | if (token == -1) |
1059 | return NULL; |
1060 | |
1061 | bufsz = rtas_get_error_log_max(); |
1062 | |
1063 | err_args.token = cpu_to_be32(token); |
1064 | err_args.nargs = cpu_to_be32(2); |
1065 | err_args.nret = cpu_to_be32(1); |
1066 | err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); |
1067 | err_args.args[1] = cpu_to_be32(bufsz); |
1068 | err_args.args[2] = 0; |
1069 | |
1070 | save_args = rtas_args; |
1071 | rtas_args = err_args; |
1072 | |
1073 | do_enter_rtas(&rtas_args); |
1074 | |
1075 | err_args = rtas_args; |
1076 | rtas_args = save_args; |
1077 | |
1078 | /* Log the error in the unlikely case that there was one. */ |
1079 | if (unlikely(err_args.args[2] == 0)) { |
1080 | if (altbuf) { |
1081 | buf = altbuf; |
1082 | } else { |
1083 | buf = rtas_err_buf; |
1084 | if (slab_is_available()) |
1085 | buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); |
1086 | } |
1087 | if (buf) |
1088 | memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); |
1089 | } |
1090 | |
1091 | return buf; |
1092 | } |
1093 | |
1094 | #define get_errorlog_buffer() kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL) |
1095 | |
1096 | #else /* CONFIG_RTAS_ERROR_LOGGING */ |
1097 | #define __fetch_rtas_last_error(x) NULL |
1098 | #define get_errorlog_buffer() NULL |
1099 | static void __init init_error_log_max(void) {} |
1100 | #endif |
1101 | |
1102 | |
1103 | static void |
1104 | va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, |
1105 | va_list list) |
1106 | { |
1107 | int i; |
1108 | |
1109 | args->token = cpu_to_be32(token); |
1110 | args->nargs = cpu_to_be32(nargs); |
1111 | args->nret = cpu_to_be32(nret); |
1112 | args->rets = &(args->args[nargs]); |
1113 | |
1114 | for (i = 0; i < nargs; ++i) |
1115 | args->args[i] = cpu_to_be32(va_arg(list, __u32)); |
1116 | |
1117 | for (i = 0; i < nret; ++i) |
1118 | args->rets[i] = 0; |
1119 | |
1120 | do_enter_rtas(args); |
1121 | } |
1122 | |
1123 | /** |
1124 | * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization. |
1125 | * @args: RTAS parameter block to be used for the call, must obey RTAS addressing |
1126 | * constraints. |
1127 | * @token: Identifies the function being invoked. |
1128 | * @nargs: Number of input parameters. Does not include token. |
1129 | * @nret: Number of output parameters, including the call status. |
1130 | * @....: List of @nargs input parameters. |
1131 | * |
1132 | * Invokes the RTAS function indicated by @token, which the caller |
1133 | * should obtain via rtas_function_token(). |
1134 | * |
1135 | * This function is similar to rtas_call(), but must be used with a |
1136 | * limited set of RTAS calls specifically exempted from the general |
1137 | * requirement that only one RTAS call may be in progress at any |
1138 | * time. Examples include stop-self and ibm,nmi-interlock. |
1139 | */ |
1140 | void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) |
1141 | { |
1142 | va_list list; |
1143 | |
1144 | va_start(list, nret); |
1145 | va_rtas_call_unlocked(args, token, nargs, nret, list); |
1146 | va_end(list); |
1147 | } |
1148 | |
1149 | static bool token_is_restricted_errinjct(s32 token) |
1150 | { |
1151 | return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) || |
1152 | token == rtas_function_token(RTAS_FN_IBM_ERRINJCT); |
1153 | } |
1154 | |
1155 | /** |
1156 | * rtas_call() - Invoke an RTAS firmware function. |
1157 | * @token: Identifies the function being invoked. |
1158 | * @nargs: Number of input parameters. Does not include token. |
1159 | * @nret: Number of output parameters, including the call status. |
1160 | * @outputs: Array of @nret output words. |
1161 | * @....: List of @nargs input parameters. |
1162 | * |
1163 | * Invokes the RTAS function indicated by @token, which the caller |
1164 | * should obtain via rtas_function_token(). |
1165 | * |
1166 | * The @nargs and @nret arguments must match the number of input and |
1167 | * output parameters specified for the RTAS function. |
1168 | * |
1169 | * rtas_call() returns RTAS status codes, not conventional Linux errno |
1170 | * values. Callers must translate any failure to an appropriate errno |
1171 | * in syscall context. Most callers of RTAS functions that can return |
1172 | * -2 or 990x should use rtas_busy_delay() to correctly handle those |
1173 | * statuses before calling again. |
1174 | * |
1175 | * The return value descriptions are adapted from 7.2.8 [RTAS] Return |
1176 | * Codes of the PAPR and CHRP specifications. |
1177 | * |
1178 | * Context: Process context preferably, interrupt context if |
1179 | * necessary. Acquires an internal spinlock and may perform |
1180 | * GFP_ATOMIC slab allocation in error path. Unsafe for NMI |
1181 | * context. |
1182 | * Return: |
1183 | * * 0 - RTAS function call succeeded. |
1184 | * * -1 - RTAS function encountered a hardware or |
1185 | * platform error, or the token is invalid, |
1186 | * or the function is restricted by kernel policy. |
1187 | * * -2 - Specs say "A necessary hardware device was busy, |
1188 | * and the requested function could not be |
1189 | * performed. The operation should be retried at |
1190 | * a later time." This is misleading, at least with |
1191 | * respect to current RTAS implementations. What it |
1192 | * usually means in practice is that the function |
1193 | * could not be completed while meeting RTAS's |
1194 | * deadline for returning control to the OS (250us |
1195 | * for PAPR/PowerVM, typically), but the call may be |
1196 | * immediately reattempted to resume work on it. |
1197 | * * -3 - Parameter error. |
1198 | * * -7 - Unexpected state change. |
1199 | * * 9000...9899 - Vendor-specific success codes. |
1200 | * * 9900...9905 - Advisory extended delay. Caller should try |
1201 | * again after ~10^x ms has elapsed, where x is |
1202 | * the last digit of the status [0-5]. Again going |
1203 | * beyond the PAPR text, 990x on PowerVM indicates |
1204 | * contention for RTAS-internal resources. Other |
1205 | * RTAS call sequences in progress should be |
1206 | * allowed to complete before reattempting the |
1207 | * call. |
1208 | * * -9000 - Multi-level isolation error. |
1209 | * * -9999...-9004 - Vendor-specific error codes. |
1210 | * * Additional negative values - Function-specific error. |
1211 | * * Additional positive values - Function-specific success. |
1212 | */ |
1213 | int rtas_call(int token, int nargs, int nret, int *outputs, ...) |
1214 | { |
1215 | struct pin_cookie cookie; |
1216 | va_list list; |
1217 | int i; |
1218 | unsigned long flags; |
1219 | struct rtas_args *args; |
1220 | char *buff_copy = NULL; |
1221 | int ret; |
1222 | |
1223 | if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) |
1224 | return -1; |
1225 | |
1226 | if (token_is_restricted_errinjct(token)) { |
1227 | /* |
1228 | * It would be nicer to not discard the error value |
1229 | * from security_locked_down(), but callers expect an |
1230 | * RTAS status, not an errno. |
1231 | */ |
1232 | if (security_locked_down(what: LOCKDOWN_RTAS_ERROR_INJECTION)) |
1233 | return -1; |
1234 | } |
1235 | |
1236 | if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) { |
1237 | WARN_ON_ONCE(1); |
1238 | return -1; |
1239 | } |
1240 | |
1241 | raw_spin_lock_irqsave(&rtas_lock, flags); |
1242 | cookie = lockdep_pin_lock(&rtas_lock); |
1243 | |
1244 | /* We use the global rtas args buffer */ |
1245 | args = &rtas_args; |
1246 | |
1247 | va_start(list, outputs); |
1248 | va_rtas_call_unlocked(args, token, nargs, nret, list); |
1249 | va_end(list); |
1250 | |
1251 | /* A -1 return code indicates that the last command couldn't |
1252 | be completed due to a hardware error. */ |
1253 | if (be32_to_cpu(args->rets[0]) == -1) |
1254 | buff_copy = __fetch_rtas_last_error(NULL); |
1255 | |
1256 | if (nret > 1 && outputs != NULL) |
1257 | for (i = 0; i < nret-1; ++i) |
1258 | outputs[i] = be32_to_cpu(args->rets[i + 1]); |
1259 | ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; |
1260 | |
1261 | lockdep_unpin_lock(&rtas_lock, cookie); |
1262 | raw_spin_unlock_irqrestore(&rtas_lock, flags); |
1263 | |
1264 | if (buff_copy) { |
1265 | log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); |
1266 | if (slab_is_available()) |
1267 | kfree(objp: buff_copy); |
1268 | } |
1269 | return ret; |
1270 | } |
1271 | EXPORT_SYMBOL_GPL(rtas_call); |
1272 | |
1273 | /** |
1274 | * rtas_busy_delay_time() - From an RTAS status value, calculate the |
1275 | * suggested delay time in milliseconds. |
1276 | * |
1277 | * @status: a value returned from rtas_call() or similar APIs which return |
1278 | * the status of a RTAS function call. |
1279 | * |
1280 | * Context: Any context. |
1281 | * |
1282 | * Return: |
1283 | * * 100000 - If @status is 9905. |
1284 | * * 10000 - If @status is 9904. |
1285 | * * 1000 - If @status is 9903. |
1286 | * * 100 - If @status is 9902. |
1287 | * * 10 - If @status is 9901. |
1288 | * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but |
1289 | * some callers depend on this behavior, and the worst outcome |
1290 | * is that they will delay for longer than necessary. |
1291 | * * 0 - If @status is not a busy or extended delay value. |
1292 | */ |
1293 | unsigned int rtas_busy_delay_time(int status) |
1294 | { |
1295 | int order; |
1296 | unsigned int ms = 0; |
1297 | |
1298 | if (status == RTAS_BUSY) { |
1299 | ms = 1; |
1300 | } else if (status >= RTAS_EXTENDED_DELAY_MIN && |
1301 | status <= RTAS_EXTENDED_DELAY_MAX) { |
1302 | order = status - RTAS_EXTENDED_DELAY_MIN; |
1303 | for (ms = 1; order > 0; order--) |
1304 | ms *= 10; |
1305 | } |
1306 | |
1307 | return ms; |
1308 | } |
1309 | |
1310 | /* |
1311 | * Early boot fallback for rtas_busy_delay(). |
1312 | */ |
1313 | static bool __init rtas_busy_delay_early(int status) |
1314 | { |
1315 | static size_t successive_ext_delays __initdata; |
1316 | bool retry; |
1317 | |
1318 | switch (status) { |
1319 | case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: |
1320 | /* |
1321 | * In the unlikely case that we receive an extended |
1322 | * delay status in early boot, the OS is probably not |
1323 | * the cause, and there's nothing we can do to clear |
1324 | * the condition. Best we can do is delay for a bit |
1325 | * and hope it's transient. Lie to the caller if it |
1326 | * seems like we're stuck in a retry loop. |
1327 | */ |
1328 | mdelay(1); |
1329 | retry = true; |
1330 | successive_ext_delays += 1; |
1331 | if (successive_ext_delays > 1000) { |
1332 | pr_err("too many extended delays, giving up\n" ); |
1333 | dump_stack(); |
1334 | retry = false; |
1335 | successive_ext_delays = 0; |
1336 | } |
1337 | break; |
1338 | case RTAS_BUSY: |
1339 | retry = true; |
1340 | successive_ext_delays = 0; |
1341 | break; |
1342 | default: |
1343 | retry = false; |
1344 | successive_ext_delays = 0; |
1345 | break; |
1346 | } |
1347 | |
1348 | return retry; |
1349 | } |
1350 | |
1351 | /** |
1352 | * rtas_busy_delay() - helper for RTAS busy and extended delay statuses |
1353 | * |
1354 | * @status: a value returned from rtas_call() or similar APIs which return |
1355 | * the status of a RTAS function call. |
1356 | * |
1357 | * Context: Process context. May sleep or schedule. |
1358 | * |
1359 | * Return: |
1360 | * * true - @status is RTAS_BUSY or an extended delay hint. The |
1361 | * caller may assume that the CPU has been yielded if necessary, |
1362 | * and that an appropriate delay for @status has elapsed. |
1363 | * Generally the caller should reattempt the RTAS call which |
1364 | * yielded @status. |
1365 | * |
1366 | * * false - @status is not @RTAS_BUSY nor an extended delay hint. The |
1367 | * caller is responsible for handling @status. |
1368 | */ |
1369 | bool __ref rtas_busy_delay(int status) |
1370 | { |
1371 | unsigned int ms; |
1372 | bool ret; |
1373 | |
1374 | /* |
1375 | * Can't do timed sleeps before timekeeping is up. |
1376 | */ |
1377 | if (system_state < SYSTEM_SCHEDULING) |
1378 | return rtas_busy_delay_early(status); |
1379 | |
1380 | switch (status) { |
1381 | case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: |
1382 | ret = true; |
1383 | ms = rtas_busy_delay_time(status); |
1384 | /* |
1385 | * The extended delay hint can be as high as 100 seconds. |
1386 | * Surely any function returning such a status is either |
1387 | * buggy or isn't going to be significantly slowed by us |
1388 | * polling at 1HZ. Clamp the sleep time to one second. |
1389 | */ |
1390 | ms = clamp(ms, 1U, 1000U); |
1391 | /* |
1392 | * The delay hint is an order-of-magnitude suggestion, not |
1393 | * a minimum. It is fine, possibly even advantageous, for |
1394 | * us to pause for less time than hinted. For small values, |
1395 | * use usleep_range() to ensure we don't sleep much longer |
1396 | * than actually needed. |
1397 | * |
1398 | * See Documentation/timers/timers-howto.rst for |
1399 | * explanation of the threshold used here. In effect we use |
1400 | * usleep_range() for 9900 and 9901, msleep() for |
1401 | * 9902-9905. |
1402 | */ |
1403 | if (ms <= 20) |
1404 | usleep_range(min: ms * 100, max: ms * 1000); |
1405 | else |
1406 | msleep(msecs: ms); |
1407 | break; |
1408 | case RTAS_BUSY: |
1409 | ret = true; |
1410 | /* |
1411 | * We should call again immediately if there's no other |
1412 | * work to do. |
1413 | */ |
1414 | cond_resched(); |
1415 | break; |
1416 | default: |
1417 | ret = false; |
1418 | /* |
1419 | * Not a busy or extended delay status; the caller should |
1420 | * handle @status itself. Ensure we warn on misuses in |
1421 | * atomic context regardless. |
1422 | */ |
1423 | might_sleep(); |
1424 | break; |
1425 | } |
1426 | |
1427 | return ret; |
1428 | } |
1429 | EXPORT_SYMBOL_GPL(rtas_busy_delay); |
1430 | |
1431 | int rtas_error_rc(int rtas_rc) |
1432 | { |
1433 | int rc; |
1434 | |
1435 | switch (rtas_rc) { |
1436 | case RTAS_HARDWARE_ERROR: /* Hardware Error */ |
1437 | rc = -EIO; |
1438 | break; |
1439 | case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */ |
1440 | rc = -EINVAL; |
1441 | break; |
1442 | case -9000: /* Isolation error */ |
1443 | rc = -EFAULT; |
1444 | break; |
1445 | case -9001: /* Outstanding TCE/PTE */ |
1446 | rc = -EEXIST; |
1447 | break; |
1448 | case -9002: /* No usable slot */ |
1449 | rc = -ENODEV; |
1450 | break; |
1451 | default: |
1452 | pr_err("%s: unexpected error %d\n" , __func__, rtas_rc); |
1453 | rc = -ERANGE; |
1454 | break; |
1455 | } |
1456 | return rc; |
1457 | } |
1458 | EXPORT_SYMBOL_GPL(rtas_error_rc); |
1459 | |
1460 | int rtas_get_power_level(int powerdomain, int *level) |
1461 | { |
1462 | int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL); |
1463 | int rc; |
1464 | |
1465 | if (token == RTAS_UNKNOWN_SERVICE) |
1466 | return -ENOENT; |
1467 | |
1468 | while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY) |
1469 | udelay(1); |
1470 | |
1471 | if (rc < 0) |
1472 | return rtas_error_rc(rc); |
1473 | return rc; |
1474 | } |
1475 | EXPORT_SYMBOL_GPL(rtas_get_power_level); |
1476 | |
1477 | int rtas_set_power_level(int powerdomain, int level, int *setlevel) |
1478 | { |
1479 | int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL); |
1480 | int rc; |
1481 | |
1482 | if (token == RTAS_UNKNOWN_SERVICE) |
1483 | return -ENOENT; |
1484 | |
1485 | do { |
1486 | rc = rtas_call(token, 2, 2, setlevel, powerdomain, level); |
1487 | } while (rtas_busy_delay(rc)); |
1488 | |
1489 | if (rc < 0) |
1490 | return rtas_error_rc(rc); |
1491 | return rc; |
1492 | } |
1493 | EXPORT_SYMBOL_GPL(rtas_set_power_level); |
1494 | |
1495 | int rtas_get_sensor(int sensor, int index, int *state) |
1496 | { |
1497 | int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); |
1498 | int rc; |
1499 | |
1500 | if (token == RTAS_UNKNOWN_SERVICE) |
1501 | return -ENOENT; |
1502 | |
1503 | do { |
1504 | rc = rtas_call(token, 2, 2, state, sensor, index); |
1505 | } while (rtas_busy_delay(rc)); |
1506 | |
1507 | if (rc < 0) |
1508 | return rtas_error_rc(rc); |
1509 | return rc; |
1510 | } |
1511 | EXPORT_SYMBOL_GPL(rtas_get_sensor); |
1512 | |
1513 | int rtas_get_sensor_fast(int sensor, int index, int *state) |
1514 | { |
1515 | int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); |
1516 | int rc; |
1517 | |
1518 | if (token == RTAS_UNKNOWN_SERVICE) |
1519 | return -ENOENT; |
1520 | |
1521 | rc = rtas_call(token, 2, 2, state, sensor, index); |
1522 | WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && |
1523 | rc <= RTAS_EXTENDED_DELAY_MAX)); |
1524 | |
1525 | if (rc < 0) |
1526 | return rtas_error_rc(rc); |
1527 | return rc; |
1528 | } |
1529 | |
1530 | bool rtas_indicator_present(int token, int *maxindex) |
1531 | { |
1532 | int proplen, count, i; |
1533 | const struct indicator_elem { |
1534 | __be32 token; |
1535 | __be32 maxindex; |
1536 | } *indicators; |
1537 | |
1538 | indicators = of_get_property(node: rtas.dev, name: "rtas-indicators" , lenp: &proplen); |
1539 | if (!indicators) |
1540 | return false; |
1541 | |
1542 | count = proplen / sizeof(struct indicator_elem); |
1543 | |
1544 | for (i = 0; i < count; i++) { |
1545 | if (__be32_to_cpu(indicators[i].token) != token) |
1546 | continue; |
1547 | if (maxindex) |
1548 | *maxindex = __be32_to_cpu(indicators[i].maxindex); |
1549 | return true; |
1550 | } |
1551 | |
1552 | return false; |
1553 | } |
1554 | |
1555 | int rtas_set_indicator(int indicator, int index, int new_value) |
1556 | { |
1557 | int token = rtas_function_token(RTAS_FN_SET_INDICATOR); |
1558 | int rc; |
1559 | |
1560 | if (token == RTAS_UNKNOWN_SERVICE) |
1561 | return -ENOENT; |
1562 | |
1563 | do { |
1564 | rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); |
1565 | } while (rtas_busy_delay(rc)); |
1566 | |
1567 | if (rc < 0) |
1568 | return rtas_error_rc(rc); |
1569 | return rc; |
1570 | } |
1571 | EXPORT_SYMBOL_GPL(rtas_set_indicator); |
1572 | |
1573 | /* |
1574 | * Ignoring RTAS extended delay |
1575 | */ |
1576 | int rtas_set_indicator_fast(int indicator, int index, int new_value) |
1577 | { |
1578 | int token = rtas_function_token(RTAS_FN_SET_INDICATOR); |
1579 | int rc; |
1580 | |
1581 | if (token == RTAS_UNKNOWN_SERVICE) |
1582 | return -ENOENT; |
1583 | |
1584 | rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value); |
1585 | |
1586 | WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && |
1587 | rc <= RTAS_EXTENDED_DELAY_MAX)); |
1588 | |
1589 | if (rc < 0) |
1590 | return rtas_error_rc(rc); |
1591 | |
1592 | return rc; |
1593 | } |
1594 | |
1595 | /** |
1596 | * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR. |
1597 | * |
1598 | * @fw_status: RTAS call status will be placed here if not NULL. |
1599 | * |
1600 | * rtas_ibm_suspend_me() should be called only on a CPU which has |
1601 | * received H_CONTINUE from the H_JOIN hcall. All other active CPUs |
1602 | * should be waiting to return from H_JOIN. |
1603 | * |
1604 | * rtas_ibm_suspend_me() may suspend execution of the OS |
1605 | * indefinitely. Callers should take appropriate measures upon return, such as |
1606 | * resetting watchdog facilities. |
1607 | * |
1608 | * Callers may choose to retry this call if @fw_status is |
1609 | * %RTAS_THREADS_ACTIVE. |
1610 | * |
1611 | * Return: |
1612 | * 0 - The partition has resumed from suspend, possibly after |
1613 | * migration to a different host. |
1614 | * -ECANCELED - The operation was aborted. |
1615 | * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call. |
1616 | * -EBUSY - Some other condition prevented the suspend from succeeding. |
1617 | * -EIO - Hardware/platform error. |
1618 | */ |
1619 | int rtas_ibm_suspend_me(int *fw_status) |
1620 | { |
1621 | int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME); |
1622 | int fwrc; |
1623 | int ret; |
1624 | |
1625 | fwrc = rtas_call(token, 0, 1, NULL); |
1626 | |
1627 | switch (fwrc) { |
1628 | case 0: |
1629 | ret = 0; |
1630 | break; |
1631 | case RTAS_SUSPEND_ABORTED: |
1632 | ret = -ECANCELED; |
1633 | break; |
1634 | case RTAS_THREADS_ACTIVE: |
1635 | ret = -EAGAIN; |
1636 | break; |
1637 | case RTAS_NOT_SUSPENDABLE: |
1638 | case RTAS_OUTSTANDING_COPROC: |
1639 | ret = -EBUSY; |
1640 | break; |
1641 | case -1: |
1642 | default: |
1643 | ret = -EIO; |
1644 | break; |
1645 | } |
1646 | |
1647 | if (fw_status) |
1648 | *fw_status = fwrc; |
1649 | |
1650 | return ret; |
1651 | } |
1652 | |
1653 | void __noreturn rtas_restart(char *cmd) |
1654 | { |
1655 | if (rtas_flash_term_hook) |
1656 | rtas_flash_term_hook(SYS_RESTART); |
1657 | pr_emerg("system-reboot returned %d\n" , |
1658 | rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL)); |
1659 | for (;;); |
1660 | } |
1661 | |
1662 | void rtas_power_off(void) |
1663 | { |
1664 | if (rtas_flash_term_hook) |
1665 | rtas_flash_term_hook(SYS_POWER_OFF); |
1666 | /* allow power on only with power button press */ |
1667 | pr_emerg("power-off returned %d\n" , |
1668 | rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); |
1669 | for (;;); |
1670 | } |
1671 | |
1672 | void __noreturn rtas_halt(void) |
1673 | { |
1674 | if (rtas_flash_term_hook) |
1675 | rtas_flash_term_hook(SYS_HALT); |
1676 | /* allow power on only with power button press */ |
1677 | pr_emerg("power-off returned %d\n" , |
1678 | rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); |
1679 | for (;;); |
1680 | } |
1681 | |
1682 | /* Must be in the RMO region, so we place it here */ |
1683 | static char rtas_os_term_buf[2048]; |
1684 | static bool ibm_extended_os_term; |
1685 | |
1686 | void rtas_os_term(char *str) |
1687 | { |
1688 | s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); |
1689 | static struct rtas_args args; |
1690 | int status; |
1691 | |
1692 | /* |
1693 | * Firmware with the ibm,extended-os-term property is guaranteed |
1694 | * to always return from an ibm,os-term call. Earlier versions without |
1695 | * this property may terminate the partition which we want to avoid |
1696 | * since it interferes with panic_timeout. |
1697 | */ |
1698 | |
1699 | if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term) |
1700 | return; |
1701 | |
1702 | snprintf(buf: rtas_os_term_buf, size: 2048, fmt: "OS panic: %s" , str); |
1703 | |
1704 | /* |
1705 | * Keep calling as long as RTAS returns a "try again" status, |
1706 | * but don't use rtas_busy_delay(), which potentially |
1707 | * schedules. |
1708 | */ |
1709 | do { |
1710 | rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf)); |
1711 | status = be32_to_cpu(args.rets[0]); |
1712 | } while (rtas_busy_delay_time(status)); |
1713 | |
1714 | if (status != 0) |
1715 | pr_emerg("ibm,os-term call failed %d\n" , status); |
1716 | } |
1717 | |
1718 | /** |
1719 | * rtas_activate_firmware() - Activate a new version of firmware. |
1720 | * |
1721 | * Context: This function may sleep. |
1722 | * |
1723 | * Activate a new version of partition firmware. The OS must call this |
1724 | * after resuming from a partition hibernation or migration in order |
1725 | * to maintain the ability to perform live firmware updates. It's not |
1726 | * catastrophic for this method to be absent or to fail; just log the |
1727 | * condition in that case. |
1728 | */ |
1729 | void rtas_activate_firmware(void) |
1730 | { |
1731 | int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); |
1732 | int fwrc; |
1733 | |
1734 | if (token == RTAS_UNKNOWN_SERVICE) { |
1735 | pr_notice("ibm,activate-firmware method unavailable\n" ); |
1736 | return; |
1737 | } |
1738 | |
1739 | mutex_lock(&rtas_ibm_activate_firmware_lock); |
1740 | |
1741 | do { |
1742 | fwrc = rtas_call(token, 0, 1, NULL); |
1743 | } while (rtas_busy_delay(fwrc)); |
1744 | |
1745 | mutex_unlock(lock: &rtas_ibm_activate_firmware_lock); |
1746 | |
1747 | if (fwrc) |
1748 | pr_err("ibm,activate-firmware failed (%i)\n" , fwrc); |
1749 | } |
1750 | |
1751 | /** |
1752 | * get_pseries_errorlog() - Find a specific pseries error log in an RTAS |
1753 | * extended event log. |
1754 | * @log: RTAS error/event log |
1755 | * @section_id: two character section identifier |
1756 | * |
1757 | * Return: A pointer to the specified errorlog or NULL if not found. |
1758 | */ |
1759 | noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, |
1760 | uint16_t section_id) |
1761 | { |
1762 | struct rtas_ext_event_log_v6 *ext_log = |
1763 | (struct rtas_ext_event_log_v6 *)log->buffer; |
1764 | struct pseries_errorlog *sect; |
1765 | unsigned char *p, *log_end; |
1766 | uint32_t ext_log_length = rtas_error_extended_log_length(log); |
1767 | uint8_t log_format = rtas_ext_event_log_format(ext_log); |
1768 | uint32_t company_id = rtas_ext_event_company_id(ext_log); |
1769 | |
1770 | /* Check that we understand the format */ |
1771 | if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) || |
1772 | log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || |
1773 | company_id != RTAS_V6EXT_COMPANY_ID_IBM) |
1774 | return NULL; |
1775 | |
1776 | log_end = log->buffer + ext_log_length; |
1777 | p = ext_log->vendor_log; |
1778 | |
1779 | while (p < log_end) { |
1780 | sect = (struct pseries_errorlog *)p; |
1781 | if (pseries_errorlog_id(sect) == section_id) |
1782 | return sect; |
1783 | p += pseries_errorlog_length(sect); |
1784 | } |
1785 | |
1786 | return NULL; |
1787 | } |
1788 | |
1789 | /* |
1790 | * The sys_rtas syscall, as originally designed, allows root to pass |
1791 | * arbitrary physical addresses to RTAS calls. A number of RTAS calls |
1792 | * can be abused to write to arbitrary memory and do other things that |
1793 | * are potentially harmful to system integrity, and thus should only |
1794 | * be used inside the kernel and not exposed to userspace. |
1795 | * |
1796 | * All known legitimate users of the sys_rtas syscall will only ever |
1797 | * pass addresses that fall within the RMO buffer, and use a known |
1798 | * subset of RTAS calls. |
1799 | * |
1800 | * Accordingly, we filter RTAS requests to check that the call is |
1801 | * permitted, and that provided pointers fall within the RMO buffer. |
1802 | * If a function is allowed to be invoked via the syscall, then its |
1803 | * entry in the rtas_functions table points to a rtas_filter that |
1804 | * describes its constraints, with the indexes of the parameters which |
1805 | * are expected to contain addresses and sizes of buffers allocated |
1806 | * inside the RMO buffer. |
1807 | */ |
1808 | |
1809 | static bool in_rmo_buf(u32 base, u32 end) |
1810 | { |
1811 | return base >= rtas_rmo_buf && |
1812 | base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) && |
1813 | base <= end && |
1814 | end >= rtas_rmo_buf && |
1815 | end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); |
1816 | } |
1817 | |
1818 | static bool block_rtas_call(const struct rtas_function *func, int nargs, |
1819 | struct rtas_args *args) |
1820 | { |
1821 | const struct rtas_filter *f; |
1822 | const bool is_platform_dump = |
1823 | func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; |
1824 | const bool is_config_conn = |
1825 | func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; |
1826 | u32 base, size, end; |
1827 | |
1828 | /* |
1829 | * Only functions with filters attached are allowed. |
1830 | */ |
1831 | f = func->filter; |
1832 | if (!f) |
1833 | goto err; |
1834 | /* |
1835 | * And some functions aren't allowed on LE. |
1836 | */ |
1837 | if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le) |
1838 | goto err; |
1839 | |
1840 | if (f->buf_idx1 != -1) { |
1841 | base = be32_to_cpu(args->args[f->buf_idx1]); |
1842 | if (f->size_idx1 != -1) |
1843 | size = be32_to_cpu(args->args[f->size_idx1]); |
1844 | else if (f->fixed_size) |
1845 | size = f->fixed_size; |
1846 | else |
1847 | size = 1; |
1848 | |
1849 | end = base + size - 1; |
1850 | |
1851 | /* |
1852 | * Special case for ibm,platform-dump - NULL buffer |
1853 | * address is used to indicate end of dump processing |
1854 | */ |
1855 | if (is_platform_dump && base == 0) |
1856 | return false; |
1857 | |
1858 | if (!in_rmo_buf(base, end)) |
1859 | goto err; |
1860 | } |
1861 | |
1862 | if (f->buf_idx2 != -1) { |
1863 | base = be32_to_cpu(args->args[f->buf_idx2]); |
1864 | if (f->size_idx2 != -1) |
1865 | size = be32_to_cpu(args->args[f->size_idx2]); |
1866 | else if (f->fixed_size) |
1867 | size = f->fixed_size; |
1868 | else |
1869 | size = 1; |
1870 | end = base + size - 1; |
1871 | |
1872 | /* |
1873 | * Special case for ibm,configure-connector where the |
1874 | * address can be 0 |
1875 | */ |
1876 | if (is_config_conn && base == 0) |
1877 | return false; |
1878 | |
1879 | if (!in_rmo_buf(base, end)) |
1880 | goto err; |
1881 | } |
1882 | |
1883 | return false; |
1884 | err: |
1885 | pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n" ); |
1886 | pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n" , |
1887 | func->name, nargs, current->comm); |
1888 | return true; |
1889 | } |
1890 | |
1891 | /* We assume to be passed big endian arguments */ |
1892 | SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) |
1893 | { |
1894 | const struct rtas_function *func; |
1895 | struct pin_cookie cookie; |
1896 | struct rtas_args args; |
1897 | unsigned long flags; |
1898 | char *buff_copy, *errbuf = NULL; |
1899 | int nargs, nret, token; |
1900 | |
1901 | if (!capable(CAP_SYS_ADMIN)) |
1902 | return -EPERM; |
1903 | |
1904 | if (!rtas.entry) |
1905 | return -EINVAL; |
1906 | |
1907 | if (copy_from_user(to: &args, from: uargs, n: 3 * sizeof(u32)) != 0) |
1908 | return -EFAULT; |
1909 | |
1910 | nargs = be32_to_cpu(args.nargs); |
1911 | nret = be32_to_cpu(args.nret); |
1912 | token = be32_to_cpu(args.token); |
1913 | |
1914 | if (nargs >= ARRAY_SIZE(args.args) |
1915 | || nret > ARRAY_SIZE(args.args) |
1916 | || nargs + nret > ARRAY_SIZE(args.args)) |
1917 | return -EINVAL; |
1918 | |
1919 | /* Copy in args. */ |
1920 | if (copy_from_user(args.args, uargs->args, |
1921 | nargs * sizeof(rtas_arg_t)) != 0) |
1922 | return -EFAULT; |
1923 | |
1924 | /* |
1925 | * If this token doesn't correspond to a function the kernel |
1926 | * understands, you're not allowed to call it. |
1927 | */ |
1928 | func = rtas_token_to_function_untrusted(token); |
1929 | if (!func) |
1930 | return -EINVAL; |
1931 | |
1932 | args.rets = &args.args[nargs]; |
1933 | memset(args.rets, 0, nret * sizeof(rtas_arg_t)); |
1934 | |
1935 | if (block_rtas_call(func, nargs, args: &args)) |
1936 | return -EINVAL; |
1937 | |
1938 | if (token_is_restricted_errinjct(token)) { |
1939 | int err; |
1940 | |
1941 | err = security_locked_down(what: LOCKDOWN_RTAS_ERROR_INJECTION); |
1942 | if (err) |
1943 | return err; |
1944 | } |
1945 | |
1946 | /* Need to handle ibm,suspend_me call specially */ |
1947 | if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) { |
1948 | |
1949 | /* |
1950 | * rtas_ibm_suspend_me assumes the streamid handle is in cpu |
1951 | * endian, or at least the hcall within it requires it. |
1952 | */ |
1953 | int rc = 0; |
1954 | u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) |
1955 | | be32_to_cpu(args.args[1]); |
1956 | rc = rtas_syscall_dispatch_ibm_suspend_me(handle); |
1957 | if (rc == -EAGAIN) |
1958 | args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); |
1959 | else if (rc == -EIO) |
1960 | args.rets[0] = cpu_to_be32(-1); |
1961 | else if (rc) |
1962 | return rc; |
1963 | goto copy_return; |
1964 | } |
1965 | |
1966 | buff_copy = get_errorlog_buffer(); |
1967 | |
1968 | /* |
1969 | * If this function has a mutex assigned to it, we must |
1970 | * acquire it to avoid interleaving with any kernel-based uses |
1971 | * of the same function. Kernel-based sequences acquire the |
1972 | * appropriate mutex explicitly. |
1973 | */ |
1974 | if (func->lock) |
1975 | mutex_lock(func->lock); |
1976 | |
1977 | raw_spin_lock_irqsave(&rtas_lock, flags); |
1978 | cookie = lockdep_pin_lock(&rtas_lock); |
1979 | |
1980 | rtas_args = args; |
1981 | do_enter_rtas(args: &rtas_args); |
1982 | args = rtas_args; |
1983 | |
1984 | /* A -1 return code indicates that the last command couldn't |
1985 | be completed due to a hardware error. */ |
1986 | if (be32_to_cpu(args.rets[0]) == -1) |
1987 | errbuf = __fetch_rtas_last_error(buff_copy); |
1988 | |
1989 | lockdep_unpin_lock(&rtas_lock, cookie); |
1990 | raw_spin_unlock_irqrestore(&rtas_lock, flags); |
1991 | |
1992 | if (func->lock) |
1993 | mutex_unlock(lock: func->lock); |
1994 | |
1995 | if (buff_copy) { |
1996 | if (errbuf) |
1997 | log_error(errbuf, ERR_TYPE_RTAS_LOG, 0); |
1998 | kfree(objp: buff_copy); |
1999 | } |
2000 | |
2001 | copy_return: |
2002 | /* Copy out args. */ |
2003 | if (copy_to_user(uargs->args + nargs, |
2004 | args.args + nargs, |
2005 | nret * sizeof(rtas_arg_t)) != 0) |
2006 | return -EFAULT; |
2007 | |
2008 | return 0; |
2009 | } |
2010 | |
2011 | static void __init rtas_function_table_init(void) |
2012 | { |
2013 | struct property *prop; |
2014 | |
2015 | for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { |
2016 | struct rtas_function *curr = &rtas_function_table[i]; |
2017 | struct rtas_function *prior; |
2018 | int cmp; |
2019 | |
2020 | curr->token = RTAS_UNKNOWN_SERVICE; |
2021 | |
2022 | if (i == 0) |
2023 | continue; |
2024 | /* |
2025 | * Ensure table is sorted correctly for binary search |
2026 | * on function names. |
2027 | */ |
2028 | prior = &rtas_function_table[i - 1]; |
2029 | |
2030 | cmp = strcmp(prior->name, curr->name); |
2031 | if (cmp < 0) |
2032 | continue; |
2033 | |
2034 | if (cmp == 0) { |
2035 | pr_err("'%s' has duplicate function table entries\n" , |
2036 | curr->name); |
2037 | } else { |
2038 | pr_err("function table unsorted: '%s' wrongly precedes '%s'\n" , |
2039 | prior->name, curr->name); |
2040 | } |
2041 | } |
2042 | |
2043 | for_each_property_of_node(rtas.dev, prop) { |
2044 | struct rtas_function *func; |
2045 | |
2046 | if (prop->length != sizeof(u32)) |
2047 | continue; |
2048 | |
2049 | func = __rtas_name_to_function(name: prop->name); |
2050 | if (!func) |
2051 | continue; |
2052 | |
2053 | func->token = be32_to_cpup(p: (__be32 *)prop->value); |
2054 | |
2055 | pr_debug("function %s has token %u\n" , func->name, func->token); |
2056 | } |
2057 | } |
2058 | |
2059 | /* |
2060 | * Call early during boot, before mem init, to retrieve the RTAS |
2061 | * information from the device-tree and allocate the RMO buffer for userland |
2062 | * accesses. |
2063 | */ |
2064 | void __init rtas_initialize(void) |
2065 | { |
2066 | unsigned long rtas_region = RTAS_INSTANTIATE_MAX; |
2067 | u32 base, size, entry; |
2068 | int no_base, no_size, no_entry; |
2069 | |
2070 | /* Get RTAS dev node and fill up our "rtas" structure with infos |
2071 | * about it. |
2072 | */ |
2073 | rtas.dev = of_find_node_by_name(NULL, name: "rtas" ); |
2074 | if (!rtas.dev) |
2075 | return; |
2076 | |
2077 | no_base = of_property_read_u32(np: rtas.dev, propname: "linux,rtas-base" , out_value: &base); |
2078 | no_size = of_property_read_u32(np: rtas.dev, propname: "rtas-size" , out_value: &size); |
2079 | if (no_base || no_size) { |
2080 | of_node_put(node: rtas.dev); |
2081 | rtas.dev = NULL; |
2082 | return; |
2083 | } |
2084 | |
2085 | rtas.base = base; |
2086 | rtas.size = size; |
2087 | no_entry = of_property_read_u32(np: rtas.dev, propname: "linux,rtas-entry" , out_value: &entry); |
2088 | rtas.entry = no_entry ? rtas.base : entry; |
2089 | |
2090 | init_error_log_max(); |
2091 | |
2092 | /* Must be called before any function token lookups */ |
2093 | rtas_function_table_init(); |
2094 | |
2095 | /* |
2096 | * Discover this now to avoid a device tree lookup in the |
2097 | * panic path. |
2098 | */ |
2099 | ibm_extended_os_term = of_property_read_bool(np: rtas.dev, propname: "ibm,extended-os-term" ); |
2100 | |
2101 | /* If RTAS was found, allocate the RMO buffer for it and look for |
2102 | * the stop-self token if any |
2103 | */ |
2104 | #ifdef CONFIG_PPC64 |
2105 | if (firmware_has_feature(FW_FEATURE_LPAR)) |
2106 | rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); |
2107 | #endif |
2108 | rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE, |
2109 | 0, rtas_region); |
2110 | if (!rtas_rmo_buf) |
2111 | panic(fmt: "ERROR: RTAS: Failed to allocate %lx bytes below %pa\n" , |
2112 | PAGE_SIZE, &rtas_region); |
2113 | |
2114 | rtas_work_area_reserve_arena(rtas_region); |
2115 | } |
2116 | |
2117 | int __init early_init_dt_scan_rtas(unsigned long node, |
2118 | const char *uname, int depth, void *data) |
2119 | { |
2120 | const u32 *basep, *entryp, *sizep; |
2121 | |
2122 | if (depth != 1 || strcmp(uname, "rtas" ) != 0) |
2123 | return 0; |
2124 | |
2125 | basep = of_get_flat_dt_prop(node, name: "linux,rtas-base" , NULL); |
2126 | entryp = of_get_flat_dt_prop(node, name: "linux,rtas-entry" , NULL); |
2127 | sizep = of_get_flat_dt_prop(node, name: "rtas-size" , NULL); |
2128 | |
2129 | #ifdef CONFIG_PPC64 |
2130 | /* need this feature to decide the crashkernel offset */ |
2131 | if (of_get_flat_dt_prop(node, "ibm,hypertas-functions" , NULL)) |
2132 | powerpc_firmware_features |= FW_FEATURE_LPAR; |
2133 | #endif |
2134 | |
2135 | if (basep && entryp && sizep) { |
2136 | rtas.base = *basep; |
2137 | rtas.entry = *entryp; |
2138 | rtas.size = *sizep; |
2139 | } |
2140 | |
2141 | #ifdef CONFIG_UDBG_RTAS_CONSOLE |
2142 | basep = of_get_flat_dt_prop(node, "put-term-char" , NULL); |
2143 | if (basep) |
2144 | rtas_putchar_token = *basep; |
2145 | |
2146 | basep = of_get_flat_dt_prop(node, "get-term-char" , NULL); |
2147 | if (basep) |
2148 | rtas_getchar_token = *basep; |
2149 | |
2150 | if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && |
2151 | rtas_getchar_token != RTAS_UNKNOWN_SERVICE) |
2152 | udbg_init_rtas_console(); |
2153 | |
2154 | #endif |
2155 | |
2156 | /* break now */ |
2157 | return 1; |
2158 | } |
2159 | |
2160 | static DEFINE_RAW_SPINLOCK(timebase_lock); |
2161 | static u64 timebase = 0; |
2162 | |
2163 | void rtas_give_timebase(void) |
2164 | { |
2165 | unsigned long flags; |
2166 | |
2167 | raw_spin_lock_irqsave(&timebase_lock, flags); |
2168 | hard_irq_disable(); |
2169 | rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL); |
2170 | timebase = get_tb(); |
2171 | raw_spin_unlock(&timebase_lock); |
2172 | |
2173 | while (timebase) |
2174 | barrier(); |
2175 | rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL); |
2176 | local_irq_restore(flags); |
2177 | } |
2178 | |
2179 | void rtas_take_timebase(void) |
2180 | { |
2181 | while (!timebase) |
2182 | barrier(); |
2183 | raw_spin_lock(&timebase_lock); |
2184 | set_tb(timebase >> 32, timebase & 0xffffffff); |
2185 | timebase = 0; |
2186 | raw_spin_unlock(&timebase_lock); |
2187 | } |
2188 | |